1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <assert.h>
12 #include <math.h>
13 
14 #include "./vp9_rtcd.h"
15 #include "./vpx_dsp_rtcd.h"
16 
17 #include "vpx_dsp/vpx_dsp_common.h"
18 #include "vpx_mem/vpx_mem.h"
19 #include "vpx_ports/mem.h"
20 #include "vpx_ports/system_state.h"
21 
22 #include "vp9/common/vp9_common.h"
23 #include "vp9/common/vp9_entropy.h"
24 #include "vp9/common/vp9_entropymode.h"
25 #include "vp9/common/vp9_idct.h"
26 #include "vp9/common/vp9_mvref_common.h"
27 #include "vp9/common/vp9_pred_common.h"
28 #include "vp9/common/vp9_quant_common.h"
29 #include "vp9/common/vp9_reconinter.h"
30 #include "vp9/common/vp9_reconintra.h"
31 #include "vp9/common/vp9_scan.h"
32 #include "vp9/common/vp9_seg_common.h"
33 
34 #include "vp9/encoder/vp9_cost.h"
35 #include "vp9/encoder/vp9_encodemb.h"
36 #include "vp9/encoder/vp9_encodemv.h"
37 #include "vp9/encoder/vp9_encoder.h"
38 #include "vp9/encoder/vp9_mcomp.h"
39 #include "vp9/encoder/vp9_quantize.h"
40 #include "vp9/encoder/vp9_ratectrl.h"
41 #include "vp9/encoder/vp9_rd.h"
42 #include "vp9/encoder/vp9_rdopt.h"
43 #include "vp9/encoder/vp9_aq_variance.h"
44 
45 #define LAST_FRAME_MODE_MASK \
46   ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | (1 << INTRA_FRAME))
47 #define GOLDEN_FRAME_MODE_MASK \
48   ((1 << LAST_FRAME) | (1 << ALTREF_FRAME) | (1 << INTRA_FRAME))
49 #define ALT_REF_MODE_MASK \
50   ((1 << LAST_FRAME) | (1 << GOLDEN_FRAME) | (1 << INTRA_FRAME))
51 
52 #define SECOND_REF_FRAME_MASK ((1 << ALTREF_FRAME) | 0x01)
53 
54 #define MIN_EARLY_TERM_INDEX 3
55 #define NEW_MV_DISCOUNT_FACTOR 8
56 
57 typedef struct {
58   PREDICTION_MODE mode;
59   MV_REFERENCE_FRAME ref_frame[2];
60 } MODE_DEFINITION;
61 
62 typedef struct { MV_REFERENCE_FRAME ref_frame[2]; } REF_DEFINITION;
63 
64 struct rdcost_block_args {
65   const VP9_COMP *cpi;
66   MACROBLOCK *x;
67   ENTROPY_CONTEXT t_above[16];
68   ENTROPY_CONTEXT t_left[16];
69   int this_rate;
70   int64_t this_dist;
71   int64_t this_sse;
72   int64_t this_rd;
73   int64_t best_rd;
74   int exit_early;
75   int use_fast_coef_costing;
76   const scan_order *so;
77   uint8_t skippable;
78 };
79 
80 #define LAST_NEW_MV_INDEX 6
81 static const MODE_DEFINITION vp9_mode_order[MAX_MODES] = {
82   { NEARESTMV, { LAST_FRAME, NONE } },
83   { NEARESTMV, { ALTREF_FRAME, NONE } },
84   { NEARESTMV, { GOLDEN_FRAME, NONE } },
85 
86   { DC_PRED, { INTRA_FRAME, NONE } },
87 
88   { NEWMV, { LAST_FRAME, NONE } },
89   { NEWMV, { ALTREF_FRAME, NONE } },
90   { NEWMV, { GOLDEN_FRAME, NONE } },
91 
92   { NEARMV, { LAST_FRAME, NONE } },
93   { NEARMV, { ALTREF_FRAME, NONE } },
94   { NEARMV, { GOLDEN_FRAME, NONE } },
95 
96   { ZEROMV, { LAST_FRAME, NONE } },
97   { ZEROMV, { GOLDEN_FRAME, NONE } },
98   { ZEROMV, { ALTREF_FRAME, NONE } },
99 
100   { NEARESTMV, { LAST_FRAME, ALTREF_FRAME } },
101   { NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } },
102 
103   { TM_PRED, { INTRA_FRAME, NONE } },
104 
105   { NEARMV, { LAST_FRAME, ALTREF_FRAME } },
106   { NEWMV, { LAST_FRAME, ALTREF_FRAME } },
107   { NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } },
108   { NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } },
109 
110   { ZEROMV, { LAST_FRAME, ALTREF_FRAME } },
111   { ZEROMV, { GOLDEN_FRAME, ALTREF_FRAME } },
112 
113   { H_PRED, { INTRA_FRAME, NONE } },
114   { V_PRED, { INTRA_FRAME, NONE } },
115   { D135_PRED, { INTRA_FRAME, NONE } },
116   { D207_PRED, { INTRA_FRAME, NONE } },
117   { D153_PRED, { INTRA_FRAME, NONE } },
118   { D63_PRED, { INTRA_FRAME, NONE } },
119   { D117_PRED, { INTRA_FRAME, NONE } },
120   { D45_PRED, { INTRA_FRAME, NONE } },
121 };
122 
123 static const REF_DEFINITION vp9_ref_order[MAX_REFS] = {
124   { { LAST_FRAME, NONE } },           { { GOLDEN_FRAME, NONE } },
125   { { ALTREF_FRAME, NONE } },         { { LAST_FRAME, ALTREF_FRAME } },
126   { { GOLDEN_FRAME, ALTREF_FRAME } }, { { INTRA_FRAME, NONE } },
127 };
128 
swap_block_ptr(MACROBLOCK * x,PICK_MODE_CONTEXT * ctx,int m,int n,int min_plane,int max_plane)129 static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int m, int n,
130                            int min_plane, int max_plane) {
131   int i;
132 
133   for (i = min_plane; i < max_plane; ++i) {
134     struct macroblock_plane *const p = &x->plane[i];
135     struct macroblockd_plane *const pd = &x->e_mbd.plane[i];
136 
137     p->coeff = ctx->coeff_pbuf[i][m];
138     p->qcoeff = ctx->qcoeff_pbuf[i][m];
139     pd->dqcoeff = ctx->dqcoeff_pbuf[i][m];
140     p->eobs = ctx->eobs_pbuf[i][m];
141 
142     ctx->coeff_pbuf[i][m] = ctx->coeff_pbuf[i][n];
143     ctx->qcoeff_pbuf[i][m] = ctx->qcoeff_pbuf[i][n];
144     ctx->dqcoeff_pbuf[i][m] = ctx->dqcoeff_pbuf[i][n];
145     ctx->eobs_pbuf[i][m] = ctx->eobs_pbuf[i][n];
146 
147     ctx->coeff_pbuf[i][n] = p->coeff;
148     ctx->qcoeff_pbuf[i][n] = p->qcoeff;
149     ctx->dqcoeff_pbuf[i][n] = pd->dqcoeff;
150     ctx->eobs_pbuf[i][n] = p->eobs;
151   }
152 }
153 
model_rd_for_sb(VP9_COMP * cpi,BLOCK_SIZE bsize,MACROBLOCK * x,MACROBLOCKD * xd,int * out_rate_sum,int64_t * out_dist_sum,int * skip_txfm_sb,int64_t * skip_sse_sb)154 static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x,
155                             MACROBLOCKD *xd, int *out_rate_sum,
156                             int64_t *out_dist_sum, int *skip_txfm_sb,
157                             int64_t *skip_sse_sb) {
158   // Note our transform coeffs are 8 times an orthogonal transform.
159   // Hence quantizer step is also 8 times. To get effective quantizer
160   // we need to divide by 8 before sending to modeling function.
161   int i;
162   int64_t rate_sum = 0;
163   int64_t dist_sum = 0;
164   const int ref = xd->mi[0]->ref_frame[0];
165   unsigned int sse;
166   unsigned int var = 0;
167   unsigned int sum_sse = 0;
168   int64_t total_sse = 0;
169   int skip_flag = 1;
170   const int shift = 6;
171   int rate;
172   int64_t dist;
173   const int dequant_shift =
174 #if CONFIG_VP9_HIGHBITDEPTH
175       (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd - 5 :
176 #endif  // CONFIG_VP9_HIGHBITDEPTH
177                                                     3;
178 
179   x->pred_sse[ref] = 0;
180 
181   for (i = 0; i < MAX_MB_PLANE; ++i) {
182     struct macroblock_plane *const p = &x->plane[i];
183     struct macroblockd_plane *const pd = &xd->plane[i];
184     const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
185     const TX_SIZE max_tx_size = max_txsize_lookup[bs];
186     const BLOCK_SIZE unit_size = txsize_to_bsize[max_tx_size];
187     const int64_t dc_thr = p->quant_thred[0] >> shift;
188     const int64_t ac_thr = p->quant_thred[1] >> shift;
189     // The low thresholds are used to measure if the prediction errors are
190     // low enough so that we can skip the mode search.
191     const int64_t low_dc_thr = VPXMIN(50, dc_thr >> 2);
192     const int64_t low_ac_thr = VPXMIN(80, ac_thr >> 2);
193     int bw = 1 << (b_width_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
194     int bh = 1 << (b_height_log2_lookup[bs] - b_width_log2_lookup[unit_size]);
195     int idx, idy;
196     int lw = b_width_log2_lookup[unit_size] + 2;
197     int lh = b_height_log2_lookup[unit_size] + 2;
198 
199     sum_sse = 0;
200 
201     for (idy = 0; idy < bh; ++idy) {
202       for (idx = 0; idx < bw; ++idx) {
203         uint8_t *src = p->src.buf + (idy * p->src.stride << lh) + (idx << lw);
204         uint8_t *dst = pd->dst.buf + (idy * pd->dst.stride << lh) + (idx << lh);
205         int block_idx = (idy << 1) + idx;
206         int low_err_skip = 0;
207 
208         var = cpi->fn_ptr[unit_size].vf(src, p->src.stride, dst, pd->dst.stride,
209                                         &sse);
210         x->bsse[(i << 2) + block_idx] = sse;
211         sum_sse += sse;
212 
213         x->skip_txfm[(i << 2) + block_idx] = SKIP_TXFM_NONE;
214         if (!x->select_tx_size) {
215           // Check if all ac coefficients can be quantized to zero.
216           if (var < ac_thr || var == 0) {
217             x->skip_txfm[(i << 2) + block_idx] = SKIP_TXFM_AC_ONLY;
218 
219             // Check if dc coefficient can be quantized to zero.
220             if (sse - var < dc_thr || sse == var) {
221               x->skip_txfm[(i << 2) + block_idx] = SKIP_TXFM_AC_DC;
222 
223               if (!sse || (var < low_ac_thr && sse - var < low_dc_thr))
224                 low_err_skip = 1;
225             }
226           }
227         }
228 
229         if (skip_flag && !low_err_skip) skip_flag = 0;
230 
231         if (i == 0) x->pred_sse[ref] += sse;
232       }
233     }
234 
235     total_sse += sum_sse;
236 
237     // Fast approximate the modelling function.
238     if (cpi->sf.simple_model_rd_from_var) {
239       int64_t rate;
240       const int64_t square_error = sum_sse;
241       int quantizer = (pd->dequant[1] >> dequant_shift);
242 
243       if (quantizer < 120)
244         rate = (square_error * (280 - quantizer)) >> (16 - VP9_PROB_COST_SHIFT);
245       else
246         rate = 0;
247       dist = (square_error * quantizer) >> 8;
248       rate_sum += rate;
249       dist_sum += dist;
250     } else {
251       vp9_model_rd_from_var_lapndz(sum_sse, num_pels_log2_lookup[bs],
252                                    pd->dequant[1] >> dequant_shift, &rate,
253                                    &dist);
254       rate_sum += rate;
255       dist_sum += dist;
256     }
257   }
258 
259   *skip_txfm_sb = skip_flag;
260   *skip_sse_sb = total_sse << 4;
261   *out_rate_sum = (int)rate_sum;
262   *out_dist_sum = dist_sum << 4;
263 }
264 
265 #if CONFIG_VP9_HIGHBITDEPTH
vp9_highbd_block_error_c(const tran_low_t * coeff,const tran_low_t * dqcoeff,intptr_t block_size,int64_t * ssz,int bd)266 int64_t vp9_highbd_block_error_c(const tran_low_t *coeff,
267                                  const tran_low_t *dqcoeff, intptr_t block_size,
268                                  int64_t *ssz, int bd) {
269   int i;
270   int64_t error = 0, sqcoeff = 0;
271   int shift = 2 * (bd - 8);
272   int rounding = shift > 0 ? 1 << (shift - 1) : 0;
273 
274   for (i = 0; i < block_size; i++) {
275     const int64_t diff = coeff[i] - dqcoeff[i];
276     error += diff * diff;
277     sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
278   }
279   assert(error >= 0 && sqcoeff >= 0);
280   error = (error + rounding) >> shift;
281   sqcoeff = (sqcoeff + rounding) >> shift;
282 
283   *ssz = sqcoeff;
284   return error;
285 }
286 
vp9_highbd_block_error_8bit_c(const tran_low_t * coeff,const tran_low_t * dqcoeff,intptr_t block_size,int64_t * ssz)287 int64_t vp9_highbd_block_error_8bit_c(const tran_low_t *coeff,
288                                       const tran_low_t *dqcoeff,
289                                       intptr_t block_size, int64_t *ssz) {
290   // Note that the C versions of these 2 functions (vp9_block_error and
291   // vp9_highbd_block_error_8bit are the same, but the optimized assembly
292   // routines are not compatible in the non high bitdepth configuration, so
293   // they still cannot share the same name.
294   return vp9_block_error_c(coeff, dqcoeff, block_size, ssz);
295 }
296 
vp9_highbd_block_error_dispatch(const tran_low_t * coeff,const tran_low_t * dqcoeff,intptr_t block_size,int64_t * ssz,int bd)297 static int64_t vp9_highbd_block_error_dispatch(const tran_low_t *coeff,
298                                                const tran_low_t *dqcoeff,
299                                                intptr_t block_size,
300                                                int64_t *ssz, int bd) {
301   if (bd == 8) {
302     return vp9_highbd_block_error_8bit(coeff, dqcoeff, block_size, ssz);
303   } else {
304     return vp9_highbd_block_error(coeff, dqcoeff, block_size, ssz, bd);
305   }
306 }
307 #endif  // CONFIG_VP9_HIGHBITDEPTH
308 
vp9_block_error_c(const tran_low_t * coeff,const tran_low_t * dqcoeff,intptr_t block_size,int64_t * ssz)309 int64_t vp9_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
310                           intptr_t block_size, int64_t *ssz) {
311   int i;
312   int64_t error = 0, sqcoeff = 0;
313 
314   for (i = 0; i < block_size; i++) {
315     const int diff = coeff[i] - dqcoeff[i];
316     error += diff * diff;
317     sqcoeff += coeff[i] * coeff[i];
318   }
319 
320   *ssz = sqcoeff;
321   return error;
322 }
323 
vp9_block_error_fp_c(const int16_t * coeff,const int16_t * dqcoeff,int block_size)324 int64_t vp9_block_error_fp_c(const int16_t *coeff, const int16_t *dqcoeff,
325                              int block_size) {
326   int i;
327   int64_t error = 0;
328 
329   for (i = 0; i < block_size; i++) {
330     const int diff = coeff[i] - dqcoeff[i];
331     error += diff * diff;
332   }
333 
334   return error;
335 }
336 
337 /* The trailing '0' is a terminator which is used inside cost_coeffs() to
338  * decide whether to include cost of a trailing EOB node or not (i.e. we
339  * can skip this if the last coefficient in this transform block, e.g. the
340  * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block,
341  * were non-zero). */
342 static const int16_t band_counts[TX_SIZES][8] = {
343   { 1, 2, 3, 4, 3, 16 - 13, 0 },
344   { 1, 2, 3, 4, 11, 64 - 21, 0 },
345   { 1, 2, 3, 4, 11, 256 - 21, 0 },
346   { 1, 2, 3, 4, 11, 1024 - 21, 0 },
347 };
cost_coeffs(MACROBLOCK * x,int plane,int block,TX_SIZE tx_size,int pt,const int16_t * scan,const int16_t * nb,int use_fast_coef_costing)348 static int cost_coeffs(MACROBLOCK *x, int plane, int block, TX_SIZE tx_size,
349                        int pt, const int16_t *scan, const int16_t *nb,
350                        int use_fast_coef_costing) {
351   MACROBLOCKD *const xd = &x->e_mbd;
352   MODE_INFO *mi = xd->mi[0];
353   const struct macroblock_plane *p = &x->plane[plane];
354   const PLANE_TYPE type = get_plane_type(plane);
355   const int16_t *band_count = &band_counts[tx_size][1];
356   const int eob = p->eobs[block];
357   const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block);
358   unsigned int(*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
359       x->token_costs[tx_size][type][is_inter_block(mi)];
360   uint8_t token_cache[32 * 32];
361   int c, cost;
362 #if CONFIG_VP9_HIGHBITDEPTH
363   const int *cat6_high_cost = vp9_get_high_cost_table(xd->bd);
364 #else
365   const int *cat6_high_cost = vp9_get_high_cost_table(8);
366 #endif
367 
368   // Check for consistency of tx_size with mode info
369   assert(type == PLANE_TYPE_Y
370              ? mi->tx_size == tx_size
371              : get_uv_tx_size(mi, &xd->plane[plane]) == tx_size);
372 
373   if (eob == 0) {
374     // single eob token
375     cost = token_costs[0][0][pt][EOB_TOKEN];
376     c = 0;
377   } else {
378     if (use_fast_coef_costing) {
379       int band_left = *band_count++;
380 
381       // dc token
382       int v = qcoeff[0];
383       int16_t prev_t;
384       cost = vp9_get_token_cost(v, &prev_t, cat6_high_cost);
385       cost += (*token_costs)[0][pt][prev_t];
386 
387       token_cache[0] = vp9_pt_energy_class[prev_t];
388       ++token_costs;
389 
390       // ac tokens
391       for (c = 1; c < eob; c++) {
392         const int rc = scan[c];
393         int16_t t;
394 
395         v = qcoeff[rc];
396         cost += vp9_get_token_cost(v, &t, cat6_high_cost);
397         cost += (*token_costs)[!prev_t][!prev_t][t];
398         prev_t = t;
399         if (!--band_left) {
400           band_left = *band_count++;
401           ++token_costs;
402         }
403       }
404 
405       // eob token
406       if (band_left) cost += (*token_costs)[0][!prev_t][EOB_TOKEN];
407 
408     } else {  // !use_fast_coef_costing
409       int band_left = *band_count++;
410 
411       // dc token
412       int v = qcoeff[0];
413       int16_t tok;
414       unsigned int(*tok_cost_ptr)[COEFF_CONTEXTS][ENTROPY_TOKENS];
415       cost = vp9_get_token_cost(v, &tok, cat6_high_cost);
416       cost += (*token_costs)[0][pt][tok];
417 
418       token_cache[0] = vp9_pt_energy_class[tok];
419       ++token_costs;
420 
421       tok_cost_ptr = &((*token_costs)[!tok]);
422 
423       // ac tokens
424       for (c = 1; c < eob; c++) {
425         const int rc = scan[c];
426 
427         v = qcoeff[rc];
428         cost += vp9_get_token_cost(v, &tok, cat6_high_cost);
429         pt = get_coef_context(nb, token_cache, c);
430         cost += (*tok_cost_ptr)[pt][tok];
431         token_cache[rc] = vp9_pt_energy_class[tok];
432         if (!--band_left) {
433           band_left = *band_count++;
434           ++token_costs;
435         }
436         tok_cost_ptr = &((*token_costs)[!tok]);
437       }
438 
439       // eob token
440       if (band_left) {
441         pt = get_coef_context(nb, token_cache, c);
442         cost += (*token_costs)[0][pt][EOB_TOKEN];
443       }
444     }
445   }
446 
447   return cost;
448 }
449 
num_4x4_to_edge(int plane_4x4_dim,int mb_to_edge_dim,int subsampling_dim,int blk_dim)450 static INLINE int num_4x4_to_edge(int plane_4x4_dim, int mb_to_edge_dim,
451                                   int subsampling_dim, int blk_dim) {
452   return plane_4x4_dim + (mb_to_edge_dim >> (5 + subsampling_dim)) - blk_dim;
453 }
454 
455 // Compute the pixel domain sum square error on all visible 4x4s in the
456 // transform block.
pixel_sse(const VP9_COMP * const cpi,const MACROBLOCKD * xd,const struct macroblockd_plane * const pd,const uint8_t * src,const int src_stride,const uint8_t * dst,const int dst_stride,int blk_row,int blk_col,const BLOCK_SIZE plane_bsize,const BLOCK_SIZE tx_bsize)457 static unsigned pixel_sse(const VP9_COMP *const cpi, const MACROBLOCKD *xd,
458                           const struct macroblockd_plane *const pd,
459                           const uint8_t *src, const int src_stride,
460                           const uint8_t *dst, const int dst_stride, int blk_row,
461                           int blk_col, const BLOCK_SIZE plane_bsize,
462                           const BLOCK_SIZE tx_bsize) {
463   unsigned int sse = 0;
464   const int plane_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
465   const int plane_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
466   const int tx_4x4_w = num_4x4_blocks_wide_lookup[tx_bsize];
467   const int tx_4x4_h = num_4x4_blocks_high_lookup[tx_bsize];
468   int b4x4s_to_right_edge = num_4x4_to_edge(plane_4x4_w, xd->mb_to_right_edge,
469                                             pd->subsampling_x, blk_col);
470   int b4x4s_to_bottom_edge = num_4x4_to_edge(plane_4x4_h, xd->mb_to_bottom_edge,
471                                              pd->subsampling_y, blk_row);
472   if (tx_bsize == BLOCK_4X4 ||
473       (b4x4s_to_right_edge >= tx_4x4_w && b4x4s_to_bottom_edge >= tx_4x4_h)) {
474     cpi->fn_ptr[tx_bsize].vf(src, src_stride, dst, dst_stride, &sse);
475   } else {
476     const vpx_variance_fn_t vf_4x4 = cpi->fn_ptr[BLOCK_4X4].vf;
477     int r, c;
478     unsigned this_sse = 0;
479     int max_r = VPXMIN(b4x4s_to_bottom_edge, tx_4x4_h);
480     int max_c = VPXMIN(b4x4s_to_right_edge, tx_4x4_w);
481     sse = 0;
482     // if we are in the unrestricted motion border.
483     for (r = 0; r < max_r; ++r) {
484       // Skip visiting the sub blocks that are wholly within the UMV.
485       for (c = 0; c < max_c; ++c) {
486         vf_4x4(src + r * src_stride * 4 + c * 4, src_stride,
487                dst + r * dst_stride * 4 + c * 4, dst_stride, &this_sse);
488         sse += this_sse;
489       }
490     }
491   }
492   return sse;
493 }
494 
495 // Compute the squares sum squares on all visible 4x4s in the transform block.
sum_squares_visible(const MACROBLOCKD * xd,const struct macroblockd_plane * const pd,const int16_t * diff,const int diff_stride,int blk_row,int blk_col,const BLOCK_SIZE plane_bsize,const BLOCK_SIZE tx_bsize)496 static int64_t sum_squares_visible(const MACROBLOCKD *xd,
497                                    const struct macroblockd_plane *const pd,
498                                    const int16_t *diff, const int diff_stride,
499                                    int blk_row, int blk_col,
500                                    const BLOCK_SIZE plane_bsize,
501                                    const BLOCK_SIZE tx_bsize) {
502   int64_t sse;
503   const int plane_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
504   const int plane_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
505   const int tx_4x4_w = num_4x4_blocks_wide_lookup[tx_bsize];
506   const int tx_4x4_h = num_4x4_blocks_high_lookup[tx_bsize];
507   int b4x4s_to_right_edge = num_4x4_to_edge(plane_4x4_w, xd->mb_to_right_edge,
508                                             pd->subsampling_x, blk_col);
509   int b4x4s_to_bottom_edge = num_4x4_to_edge(plane_4x4_h, xd->mb_to_bottom_edge,
510                                              pd->subsampling_y, blk_row);
511   if (tx_bsize == BLOCK_4X4 ||
512       (b4x4s_to_right_edge >= tx_4x4_w && b4x4s_to_bottom_edge >= tx_4x4_h)) {
513     sse = (int64_t)vpx_sum_squares_2d_i16(diff, diff_stride, tx_bsize);
514   } else {
515     int r, c;
516     int max_r = VPXMIN(b4x4s_to_bottom_edge, tx_4x4_h);
517     int max_c = VPXMIN(b4x4s_to_right_edge, tx_4x4_w);
518     sse = 0;
519     // if we are in the unrestricted motion border.
520     for (r = 0; r < max_r; ++r) {
521       // Skip visiting the sub blocks that are wholly within the UMV.
522       for (c = 0; c < max_c; ++c) {
523         sse += (int64_t)vpx_sum_squares_2d_i16(diff, diff_stride, BLOCK_4X4);
524       }
525     }
526   }
527   return sse;
528 }
529 
dist_block(const VP9_COMP * cpi,MACROBLOCK * x,int plane,BLOCK_SIZE plane_bsize,int block,int blk_row,int blk_col,TX_SIZE tx_size,int64_t * out_dist,int64_t * out_sse)530 static void dist_block(const VP9_COMP *cpi, MACROBLOCK *x, int plane,
531                        BLOCK_SIZE plane_bsize, int block, int blk_row,
532                        int blk_col, TX_SIZE tx_size, int64_t *out_dist,
533                        int64_t *out_sse) {
534   MACROBLOCKD *const xd = &x->e_mbd;
535   const struct macroblock_plane *const p = &x->plane[plane];
536   const struct macroblockd_plane *const pd = &xd->plane[plane];
537 
538   if (x->block_tx_domain) {
539     const int ss_txfrm_size = tx_size << 1;
540     int64_t this_sse;
541     const int shift = tx_size == TX_32X32 ? 0 : 2;
542     const tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block);
543     const tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
544 #if CONFIG_VP9_HIGHBITDEPTH
545     const int bd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd : 8;
546     *out_dist = vp9_highbd_block_error_dispatch(
547                     coeff, dqcoeff, 16 << ss_txfrm_size, &this_sse, bd) >>
548                 shift;
549 #else
550     *out_dist =
551         vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size, &this_sse) >>
552         shift;
553 #endif  // CONFIG_VP9_HIGHBITDEPTH
554     *out_sse = this_sse >> shift;
555 
556     if (x->skip_encode && !is_inter_block(xd->mi[0])) {
557       // TODO(jingning): tune the model to better capture the distortion.
558       const int64_t p =
559           (pd->dequant[1] * pd->dequant[1] * (1 << ss_txfrm_size)) >>
560 #if CONFIG_VP9_HIGHBITDEPTH
561           (shift + 2 + (bd - 8) * 2);
562 #else
563           (shift + 2);
564 #endif  // CONFIG_VP9_HIGHBITDEPTH
565       *out_dist += (p >> 4);
566       *out_sse += p;
567     }
568   } else {
569     const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size];
570     const int bs = 4 * num_4x4_blocks_wide_lookup[tx_bsize];
571     const int src_stride = p->src.stride;
572     const int dst_stride = pd->dst.stride;
573     const int src_idx = 4 * (blk_row * src_stride + blk_col);
574     const int dst_idx = 4 * (blk_row * dst_stride + blk_col);
575     const uint8_t *src = &p->src.buf[src_idx];
576     const uint8_t *dst = &pd->dst.buf[dst_idx];
577     const tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
578     const uint16_t *eob = &p->eobs[block];
579     unsigned int tmp;
580 
581     tmp = pixel_sse(cpi, xd, pd, src, src_stride, dst, dst_stride, blk_row,
582                     blk_col, plane_bsize, tx_bsize);
583     *out_sse = (int64_t)tmp * 16;
584 
585     if (*eob) {
586 #if CONFIG_VP9_HIGHBITDEPTH
587       DECLARE_ALIGNED(16, uint16_t, recon16[1024]);
588       uint8_t *recon = (uint8_t *)recon16;
589 #else
590       DECLARE_ALIGNED(16, uint8_t, recon[1024]);
591 #endif  // CONFIG_VP9_HIGHBITDEPTH
592 
593 #if CONFIG_VP9_HIGHBITDEPTH
594       if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
595         recon = CONVERT_TO_BYTEPTR(recon);
596         vpx_highbd_convolve_copy(dst, dst_stride, recon, 32, NULL, 0, NULL, 0,
597                                  bs, bs, xd->bd);
598         if (xd->lossless) {
599           vp9_highbd_iwht4x4_add(dqcoeff, recon, 32, *eob, xd->bd);
600         } else {
601           switch (tx_size) {
602             case TX_4X4:
603               vp9_highbd_idct4x4_add(dqcoeff, recon, 32, *eob, xd->bd);
604               break;
605             case TX_8X8:
606               vp9_highbd_idct8x8_add(dqcoeff, recon, 32, *eob, xd->bd);
607               break;
608             case TX_16X16:
609               vp9_highbd_idct16x16_add(dqcoeff, recon, 32, *eob, xd->bd);
610               break;
611             case TX_32X32:
612               vp9_highbd_idct32x32_add(dqcoeff, recon, 32, *eob, xd->bd);
613               break;
614             default: assert(0 && "Invalid transform size");
615           }
616         }
617       } else {
618 #endif  // CONFIG_VP9_HIGHBITDEPTH
619         vpx_convolve_copy(dst, dst_stride, recon, 32, NULL, 0, NULL, 0, bs, bs);
620         switch (tx_size) {
621           case TX_32X32: vp9_idct32x32_add(dqcoeff, recon, 32, *eob); break;
622           case TX_16X16: vp9_idct16x16_add(dqcoeff, recon, 32, *eob); break;
623           case TX_8X8: vp9_idct8x8_add(dqcoeff, recon, 32, *eob); break;
624           case TX_4X4:
625             // this is like vp9_short_idct4x4 but has a special case around
626             // eob<=1, which is significant (not just an optimization) for
627             // the lossless case.
628             x->itxm_add(dqcoeff, recon, 32, *eob);
629             break;
630           default: assert(0 && "Invalid transform size"); break;
631         }
632 #if CONFIG_VP9_HIGHBITDEPTH
633       }
634 #endif  // CONFIG_VP9_HIGHBITDEPTH
635 
636       tmp = pixel_sse(cpi, xd, pd, src, src_stride, recon, 32, blk_row, blk_col,
637                       plane_bsize, tx_bsize);
638     }
639 
640     *out_dist = (int64_t)tmp * 16;
641   }
642 }
643 
rate_block(int plane,int block,TX_SIZE tx_size,int coeff_ctx,struct rdcost_block_args * args)644 static int rate_block(int plane, int block, TX_SIZE tx_size, int coeff_ctx,
645                       struct rdcost_block_args *args) {
646   return cost_coeffs(args->x, plane, block, tx_size, coeff_ctx, args->so->scan,
647                      args->so->neighbors, args->use_fast_coef_costing);
648 }
649 
block_rd_txfm(int plane,int block,int blk_row,int blk_col,BLOCK_SIZE plane_bsize,TX_SIZE tx_size,void * arg)650 static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
651                           BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) {
652   struct rdcost_block_args *args = arg;
653   MACROBLOCK *const x = args->x;
654   MACROBLOCKD *const xd = &x->e_mbd;
655   MODE_INFO *const mi = xd->mi[0];
656   int64_t rd1, rd2, rd;
657   int rate;
658   int64_t dist;
659   int64_t sse;
660   const int coeff_ctx =
661       combine_entropy_contexts(args->t_left[blk_row], args->t_above[blk_col]);
662 
663   if (args->exit_early) return;
664 
665   if (!is_inter_block(mi)) {
666     struct encode_b_args intra_arg = { x, x->block_qcoeff_opt, args->t_above,
667                                        args->t_left, &mi->skip };
668     vp9_encode_block_intra(plane, block, blk_row, blk_col, plane_bsize, tx_size,
669                            &intra_arg);
670     if (x->block_tx_domain) {
671       dist_block(args->cpi, x, plane, plane_bsize, block, blk_row, blk_col,
672                  tx_size, &dist, &sse);
673     } else {
674       const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size];
675       const struct macroblock_plane *const p = &x->plane[plane];
676       const struct macroblockd_plane *const pd = &xd->plane[plane];
677       const int src_stride = p->src.stride;
678       const int dst_stride = pd->dst.stride;
679       const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
680       const uint8_t *src = &p->src.buf[4 * (blk_row * src_stride + blk_col)];
681       const uint8_t *dst = &pd->dst.buf[4 * (blk_row * dst_stride + blk_col)];
682       const int16_t *diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)];
683       unsigned int tmp;
684       sse = sum_squares_visible(xd, pd, diff, diff_stride, blk_row, blk_col,
685                                 plane_bsize, tx_bsize);
686 #if CONFIG_VP9_HIGHBITDEPTH
687       if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) && (xd->bd > 8))
688         sse = ROUND64_POWER_OF_TWO(sse, (xd->bd - 8) * 2);
689 #endif  // CONFIG_VP9_HIGHBITDEPTH
690       sse = sse * 16;
691       tmp = pixel_sse(args->cpi, xd, pd, src, src_stride, dst, dst_stride,
692                       blk_row, blk_col, plane_bsize, tx_bsize);
693       dist = (int64_t)tmp * 16;
694     }
695   } else if (max_txsize_lookup[plane_bsize] == tx_size) {
696     if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] ==
697         SKIP_TXFM_NONE) {
698       // full forward transform and quantization
699       vp9_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size);
700       if (x->block_qcoeff_opt)
701         vp9_optimize_b(x, plane, block, tx_size, coeff_ctx);
702       dist_block(args->cpi, x, plane, plane_bsize, block, blk_row, blk_col,
703                  tx_size, &dist, &sse);
704     } else if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] ==
705                SKIP_TXFM_AC_ONLY) {
706       // compute DC coefficient
707       tran_low_t *const coeff = BLOCK_OFFSET(x->plane[plane].coeff, block);
708       tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block);
709       vp9_xform_quant_dc(x, plane, block, blk_row, blk_col, plane_bsize,
710                          tx_size);
711       sse = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
712       dist = sse;
713       if (x->plane[plane].eobs[block]) {
714         const int64_t orig_sse = (int64_t)coeff[0] * coeff[0];
715         const int64_t resd_sse = coeff[0] - dqcoeff[0];
716         int64_t dc_correct = orig_sse - resd_sse * resd_sse;
717 #if CONFIG_VP9_HIGHBITDEPTH
718         dc_correct >>= ((xd->bd - 8) * 2);
719 #endif
720         if (tx_size != TX_32X32) dc_correct >>= 2;
721 
722         dist = VPXMAX(0, sse - dc_correct);
723       }
724     } else {
725       // SKIP_TXFM_AC_DC
726       // skip forward transform
727       x->plane[plane].eobs[block] = 0;
728       sse = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
729       dist = sse;
730     }
731   } else {
732     // full forward transform and quantization
733     vp9_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size);
734     if (x->block_qcoeff_opt)
735       vp9_optimize_b(x, plane, block, tx_size, coeff_ctx);
736     dist_block(args->cpi, x, plane, plane_bsize, block, blk_row, blk_col,
737                tx_size, &dist, &sse);
738   }
739 
740   rd = RDCOST(x->rdmult, x->rddiv, 0, dist);
741   if (args->this_rd + rd > args->best_rd) {
742     args->exit_early = 1;
743     return;
744   }
745 
746   rate = rate_block(plane, block, tx_size, coeff_ctx, args);
747   args->t_above[blk_col] = (x->plane[plane].eobs[block] > 0) ? 1 : 0;
748   args->t_left[blk_row] = (x->plane[plane].eobs[block] > 0) ? 1 : 0;
749   rd1 = RDCOST(x->rdmult, x->rddiv, rate, dist);
750   rd2 = RDCOST(x->rdmult, x->rddiv, 0, sse);
751 
752   // TODO(jingning): temporarily enabled only for luma component
753   rd = VPXMIN(rd1, rd2);
754   if (plane == 0)
755     x->zcoeff_blk[tx_size][block] =
756         !x->plane[plane].eobs[block] || (rd1 > rd2 && !xd->lossless);
757 
758   args->this_rate += rate;
759   args->this_dist += dist;
760   args->this_sse += sse;
761   args->this_rd += rd;
762 
763   if (args->this_rd > args->best_rd) {
764     args->exit_early = 1;
765     return;
766   }
767 
768   args->skippable &= !x->plane[plane].eobs[block];
769 }
770 
txfm_rd_in_plane(const VP9_COMP * cpi,MACROBLOCK * x,int * rate,int64_t * distortion,int * skippable,int64_t * sse,int64_t ref_best_rd,int plane,BLOCK_SIZE bsize,TX_SIZE tx_size,int use_fast_coef_casting)771 static void txfm_rd_in_plane(const VP9_COMP *cpi, MACROBLOCK *x, int *rate,
772                              int64_t *distortion, int *skippable, int64_t *sse,
773                              int64_t ref_best_rd, int plane, BLOCK_SIZE bsize,
774                              TX_SIZE tx_size, int use_fast_coef_casting) {
775   MACROBLOCKD *const xd = &x->e_mbd;
776   const struct macroblockd_plane *const pd = &xd->plane[plane];
777   struct rdcost_block_args args;
778   vp9_zero(args);
779   args.cpi = cpi;
780   args.x = x;
781   args.best_rd = ref_best_rd;
782   args.use_fast_coef_costing = use_fast_coef_casting;
783   args.skippable = 1;
784 
785   if (plane == 0) xd->mi[0]->tx_size = tx_size;
786 
787   vp9_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left);
788 
789   args.so = get_scan(xd, tx_size, get_plane_type(plane), 0);
790 
791   vp9_foreach_transformed_block_in_plane(xd, bsize, plane, block_rd_txfm,
792                                          &args);
793   if (args.exit_early) {
794     *rate = INT_MAX;
795     *distortion = INT64_MAX;
796     *sse = INT64_MAX;
797     *skippable = 0;
798   } else {
799     *distortion = args.this_dist;
800     *rate = args.this_rate;
801     *sse = args.this_sse;
802     *skippable = args.skippable;
803   }
804 }
805 
choose_largest_tx_size(VP9_COMP * cpi,MACROBLOCK * x,int * rate,int64_t * distortion,int * skip,int64_t * sse,int64_t ref_best_rd,BLOCK_SIZE bs)806 static void choose_largest_tx_size(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
807                                    int64_t *distortion, int *skip, int64_t *sse,
808                                    int64_t ref_best_rd, BLOCK_SIZE bs) {
809   const TX_SIZE max_tx_size = max_txsize_lookup[bs];
810   VP9_COMMON *const cm = &cpi->common;
811   const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode];
812   MACROBLOCKD *const xd = &x->e_mbd;
813   MODE_INFO *const mi = xd->mi[0];
814 
815   mi->tx_size = VPXMIN(max_tx_size, largest_tx_size);
816 
817   txfm_rd_in_plane(cpi, x, rate, distortion, skip, sse, ref_best_rd, 0, bs,
818                    mi->tx_size, cpi->sf.use_fast_coef_costing);
819 }
820 
choose_tx_size_from_rd(VP9_COMP * cpi,MACROBLOCK * x,int * rate,int64_t * distortion,int * skip,int64_t * psse,int64_t ref_best_rd,BLOCK_SIZE bs)821 static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
822                                    int64_t *distortion, int *skip,
823                                    int64_t *psse, int64_t ref_best_rd,
824                                    BLOCK_SIZE bs) {
825   const TX_SIZE max_tx_size = max_txsize_lookup[bs];
826   VP9_COMMON *const cm = &cpi->common;
827   MACROBLOCKD *const xd = &x->e_mbd;
828   MODE_INFO *const mi = xd->mi[0];
829   vpx_prob skip_prob = vp9_get_skip_prob(cm, xd);
830   int r[TX_SIZES][2], s[TX_SIZES];
831   int64_t d[TX_SIZES], sse[TX_SIZES];
832   int64_t rd[TX_SIZES][2] = { { INT64_MAX, INT64_MAX },
833                               { INT64_MAX, INT64_MAX },
834                               { INT64_MAX, INT64_MAX },
835                               { INT64_MAX, INT64_MAX } };
836   int n, m;
837   int s0, s1;
838   int64_t best_rd = INT64_MAX;
839   TX_SIZE best_tx = max_tx_size;
840   int start_tx, end_tx;
841 
842   const vpx_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc->tx_probs);
843   assert(skip_prob > 0);
844   s0 = vp9_cost_bit(skip_prob, 0);
845   s1 = vp9_cost_bit(skip_prob, 1);
846 
847   if (cm->tx_mode == TX_MODE_SELECT) {
848     start_tx = max_tx_size;
849     end_tx = 0;
850   } else {
851     TX_SIZE chosen_tx_size =
852         VPXMIN(max_tx_size, tx_mode_to_biggest_tx_size[cm->tx_mode]);
853     start_tx = chosen_tx_size;
854     end_tx = chosen_tx_size;
855   }
856 
857   for (n = start_tx; n >= end_tx; n--) {
858     int r_tx_size = 0;
859     for (m = 0; m <= n - (n == (int)max_tx_size); m++) {
860       if (m == n)
861         r_tx_size += vp9_cost_zero(tx_probs[m]);
862       else
863         r_tx_size += vp9_cost_one(tx_probs[m]);
864     }
865     txfm_rd_in_plane(cpi, x, &r[n][0], &d[n], &s[n], &sse[n], ref_best_rd, 0,
866                      bs, n, cpi->sf.use_fast_coef_costing);
867     r[n][1] = r[n][0];
868     if (r[n][0] < INT_MAX) {
869       r[n][1] += r_tx_size;
870     }
871     if (d[n] == INT64_MAX || r[n][0] == INT_MAX) {
872       rd[n][0] = rd[n][1] = INT64_MAX;
873     } else if (s[n]) {
874       if (is_inter_block(mi)) {
875         rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, sse[n]);
876         r[n][1] -= r_tx_size;
877       } else {
878         rd[n][0] = RDCOST(x->rdmult, x->rddiv, s1, sse[n]);
879         rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1 + r_tx_size, sse[n]);
880       }
881     } else {
882       rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]);
883       rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]);
884     }
885 
886     if (is_inter_block(mi) && !xd->lossless && !s[n] && sse[n] != INT64_MAX) {
887       rd[n][0] = VPXMIN(rd[n][0], RDCOST(x->rdmult, x->rddiv, s1, sse[n]));
888       rd[n][1] = VPXMIN(rd[n][1], RDCOST(x->rdmult, x->rddiv, s1, sse[n]));
889     }
890 
891     // Early termination in transform size search.
892     if (cpi->sf.tx_size_search_breakout &&
893         (rd[n][1] == INT64_MAX ||
894          (n < (int)max_tx_size && rd[n][1] > rd[n + 1][1]) || s[n] == 1))
895       break;
896 
897     if (rd[n][1] < best_rd) {
898       best_tx = n;
899       best_rd = rd[n][1];
900     }
901   }
902   mi->tx_size = best_tx;
903 
904   *distortion = d[mi->tx_size];
905   *rate = r[mi->tx_size][cm->tx_mode == TX_MODE_SELECT];
906   *skip = s[mi->tx_size];
907   *psse = sse[mi->tx_size];
908 }
909 
super_block_yrd(VP9_COMP * cpi,MACROBLOCK * x,int * rate,int64_t * distortion,int * skip,int64_t * psse,BLOCK_SIZE bs,int64_t ref_best_rd)910 static void super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
911                             int64_t *distortion, int *skip, int64_t *psse,
912                             BLOCK_SIZE bs, int64_t ref_best_rd) {
913   MACROBLOCKD *xd = &x->e_mbd;
914   int64_t sse;
915   int64_t *ret_sse = psse ? psse : &sse;
916 
917   assert(bs == xd->mi[0]->sb_type);
918 
919   if (cpi->sf.tx_size_search_method == USE_LARGESTALL || xd->lossless) {
920     choose_largest_tx_size(cpi, x, rate, distortion, skip, ret_sse, ref_best_rd,
921                            bs);
922   } else {
923     choose_tx_size_from_rd(cpi, x, rate, distortion, skip, ret_sse, ref_best_rd,
924                            bs);
925   }
926 }
927 
conditional_skipintra(PREDICTION_MODE mode,PREDICTION_MODE best_intra_mode)928 static int conditional_skipintra(PREDICTION_MODE mode,
929                                  PREDICTION_MODE best_intra_mode) {
930   if (mode == D117_PRED && best_intra_mode != V_PRED &&
931       best_intra_mode != D135_PRED)
932     return 1;
933   if (mode == D63_PRED && best_intra_mode != V_PRED &&
934       best_intra_mode != D45_PRED)
935     return 1;
936   if (mode == D207_PRED && best_intra_mode != H_PRED &&
937       best_intra_mode != D45_PRED)
938     return 1;
939   if (mode == D153_PRED && best_intra_mode != H_PRED &&
940       best_intra_mode != D135_PRED)
941     return 1;
942   return 0;
943 }
944 
rd_pick_intra4x4block(VP9_COMP * cpi,MACROBLOCK * x,int row,int col,PREDICTION_MODE * best_mode,const int * bmode_costs,ENTROPY_CONTEXT * a,ENTROPY_CONTEXT * l,int * bestrate,int * bestratey,int64_t * bestdistortion,BLOCK_SIZE bsize,int64_t rd_thresh)945 static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int row,
946                                      int col, PREDICTION_MODE *best_mode,
947                                      const int *bmode_costs, ENTROPY_CONTEXT *a,
948                                      ENTROPY_CONTEXT *l, int *bestrate,
949                                      int *bestratey, int64_t *bestdistortion,
950                                      BLOCK_SIZE bsize, int64_t rd_thresh) {
951   PREDICTION_MODE mode;
952   MACROBLOCKD *const xd = &x->e_mbd;
953   int64_t best_rd = rd_thresh;
954   struct macroblock_plane *p = &x->plane[0];
955   struct macroblockd_plane *pd = &xd->plane[0];
956   const int src_stride = p->src.stride;
957   const int dst_stride = pd->dst.stride;
958   const uint8_t *src_init = &p->src.buf[row * 4 * src_stride + col * 4];
959   uint8_t *dst_init = &pd->dst.buf[row * 4 * src_stride + col * 4];
960   ENTROPY_CONTEXT ta[2], tempa[2];
961   ENTROPY_CONTEXT tl[2], templ[2];
962   const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
963   const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
964   int idx, idy;
965   uint8_t best_dst[8 * 8];
966 #if CONFIG_VP9_HIGHBITDEPTH
967   uint16_t best_dst16[8 * 8];
968 #endif
969   memcpy(ta, a, num_4x4_blocks_wide * sizeof(a[0]));
970   memcpy(tl, l, num_4x4_blocks_high * sizeof(l[0]));
971 
972   xd->mi[0]->tx_size = TX_4X4;
973 
974 #if CONFIG_VP9_HIGHBITDEPTH
975   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
976     for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
977       int64_t this_rd;
978       int ratey = 0;
979       int64_t distortion = 0;
980       int rate = bmode_costs[mode];
981 
982       if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode))) continue;
983 
984       // Only do the oblique modes if the best so far is
985       // one of the neighboring directional modes
986       if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
987         if (conditional_skipintra(mode, *best_mode)) continue;
988       }
989 
990       memcpy(tempa, ta, num_4x4_blocks_wide * sizeof(ta[0]));
991       memcpy(templ, tl, num_4x4_blocks_high * sizeof(tl[0]));
992 
993       for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
994         for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
995           const int block = (row + idy) * 2 + (col + idx);
996           const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
997           uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
998           int16_t *const src_diff =
999               vp9_raster_block_offset_int16(BLOCK_8X8, block, p->src_diff);
1000           tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
1001           xd->mi[0]->bmi[block].as_mode = mode;
1002           vp9_predict_intra_block(xd, 1, TX_4X4, mode,
1003                                   x->skip_encode ? src : dst,
1004                                   x->skip_encode ? src_stride : dst_stride, dst,
1005                                   dst_stride, col + idx, row + idy, 0);
1006           vpx_highbd_subtract_block(4, 4, src_diff, 8, src, src_stride, dst,
1007                                     dst_stride, xd->bd);
1008           if (xd->lossless) {
1009             const scan_order *so = &vp9_default_scan_orders[TX_4X4];
1010             const int coeff_ctx =
1011                 combine_entropy_contexts(tempa[idx], templ[idy]);
1012             vp9_highbd_fwht4x4(src_diff, coeff, 8);
1013             vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
1014             ratey += cost_coeffs(x, 0, block, TX_4X4, coeff_ctx, so->scan,
1015                                  so->neighbors, cpi->sf.use_fast_coef_costing);
1016             tempa[idx] = templ[idy] = (x->plane[0].eobs[block] > 0 ? 1 : 0);
1017             if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
1018               goto next_highbd;
1019             vp9_highbd_iwht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block), dst,
1020                                    dst_stride, p->eobs[block], xd->bd);
1021           } else {
1022             int64_t unused;
1023             const TX_TYPE tx_type = get_tx_type_4x4(PLANE_TYPE_Y, xd, block);
1024             const scan_order *so = &vp9_scan_orders[TX_4X4][tx_type];
1025             const int coeff_ctx =
1026                 combine_entropy_contexts(tempa[idx], templ[idy]);
1027             if (tx_type == DCT_DCT)
1028               vpx_highbd_fdct4x4(src_diff, coeff, 8);
1029             else
1030               vp9_highbd_fht4x4(src_diff, coeff, 8, tx_type);
1031             vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
1032             ratey += cost_coeffs(x, 0, block, TX_4X4, coeff_ctx, so->scan,
1033                                  so->neighbors, cpi->sf.use_fast_coef_costing);
1034             distortion += vp9_highbd_block_error_dispatch(
1035                               coeff, BLOCK_OFFSET(pd->dqcoeff, block), 16,
1036                               &unused, xd->bd) >>
1037                           2;
1038             tempa[idx] = templ[idy] = (x->plane[0].eobs[block] > 0 ? 1 : 0);
1039             if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
1040               goto next_highbd;
1041             vp9_highbd_iht4x4_add(tx_type, BLOCK_OFFSET(pd->dqcoeff, block),
1042                                   dst, dst_stride, p->eobs[block], xd->bd);
1043           }
1044         }
1045       }
1046 
1047       rate += ratey;
1048       this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
1049 
1050       if (this_rd < best_rd) {
1051         *bestrate = rate;
1052         *bestratey = ratey;
1053         *bestdistortion = distortion;
1054         best_rd = this_rd;
1055         *best_mode = mode;
1056         memcpy(a, tempa, num_4x4_blocks_wide * sizeof(tempa[0]));
1057         memcpy(l, templ, num_4x4_blocks_high * sizeof(templ[0]));
1058         for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
1059           memcpy(best_dst16 + idy * 8,
1060                  CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
1061                  num_4x4_blocks_wide * 4 * sizeof(uint16_t));
1062         }
1063       }
1064     next_highbd : {}
1065     }
1066     if (best_rd >= rd_thresh || x->skip_encode) return best_rd;
1067 
1068     for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) {
1069       memcpy(CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride),
1070              best_dst16 + idy * 8, num_4x4_blocks_wide * 4 * sizeof(uint16_t));
1071     }
1072 
1073     return best_rd;
1074   }
1075 #endif  // CONFIG_VP9_HIGHBITDEPTH
1076 
1077   for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
1078     int64_t this_rd;
1079     int ratey = 0;
1080     int64_t distortion = 0;
1081     int rate = bmode_costs[mode];
1082 
1083     if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode))) continue;
1084 
1085     // Only do the oblique modes if the best so far is
1086     // one of the neighboring directional modes
1087     if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
1088       if (conditional_skipintra(mode, *best_mode)) continue;
1089     }
1090 
1091     memcpy(tempa, ta, num_4x4_blocks_wide * sizeof(ta[0]));
1092     memcpy(templ, tl, num_4x4_blocks_high * sizeof(tl[0]));
1093 
1094     for (idy = 0; idy < num_4x4_blocks_high; ++idy) {
1095       for (idx = 0; idx < num_4x4_blocks_wide; ++idx) {
1096         const int block = (row + idy) * 2 + (col + idx);
1097         const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride];
1098         uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride];
1099         int16_t *const src_diff =
1100             vp9_raster_block_offset_int16(BLOCK_8X8, block, p->src_diff);
1101         tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block);
1102         xd->mi[0]->bmi[block].as_mode = mode;
1103         vp9_predict_intra_block(xd, 1, TX_4X4, mode, x->skip_encode ? src : dst,
1104                                 x->skip_encode ? src_stride : dst_stride, dst,
1105                                 dst_stride, col + idx, row + idy, 0);
1106         vpx_subtract_block(4, 4, src_diff, 8, src, src_stride, dst, dst_stride);
1107 
1108         if (xd->lossless) {
1109           const scan_order *so = &vp9_default_scan_orders[TX_4X4];
1110           const int coeff_ctx =
1111               combine_entropy_contexts(tempa[idx], templ[idy]);
1112           vp9_fwht4x4(src_diff, coeff, 8);
1113           vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
1114           ratey += cost_coeffs(x, 0, block, TX_4X4, coeff_ctx, so->scan,
1115                                so->neighbors, cpi->sf.use_fast_coef_costing);
1116           tempa[idx] = templ[idy] = (x->plane[0].eobs[block] > 0) ? 1 : 0;
1117           if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
1118             goto next;
1119           vp9_iwht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block), dst, dst_stride,
1120                           p->eobs[block]);
1121         } else {
1122           int64_t unused;
1123           const TX_TYPE tx_type = get_tx_type_4x4(PLANE_TYPE_Y, xd, block);
1124           const scan_order *so = &vp9_scan_orders[TX_4X4][tx_type];
1125           const int coeff_ctx =
1126               combine_entropy_contexts(tempa[idx], templ[idy]);
1127           vp9_fht4x4(src_diff, coeff, 8, tx_type);
1128           vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan);
1129           ratey += cost_coeffs(x, 0, block, TX_4X4, coeff_ctx, so->scan,
1130                                so->neighbors, cpi->sf.use_fast_coef_costing);
1131           tempa[idx] = templ[idy] = (x->plane[0].eobs[block] > 0) ? 1 : 0;
1132 #if CONFIG_VP9_HIGHBITDEPTH
1133           distortion +=
1134               vp9_highbd_block_error_8bit(
1135                   coeff, BLOCK_OFFSET(pd->dqcoeff, block), 16, &unused) >>
1136               2;
1137 #else
1138           distortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, block),
1139                                         16, &unused) >>
1140                         2;
1141 #endif
1142           if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd)
1143             goto next;
1144           vp9_iht4x4_add(tx_type, BLOCK_OFFSET(pd->dqcoeff, block), dst,
1145                          dst_stride, p->eobs[block]);
1146         }
1147       }
1148     }
1149 
1150     rate += ratey;
1151     this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion);
1152 
1153     if (this_rd < best_rd) {
1154       *bestrate = rate;
1155       *bestratey = ratey;
1156       *bestdistortion = distortion;
1157       best_rd = this_rd;
1158       *best_mode = mode;
1159       memcpy(a, tempa, num_4x4_blocks_wide * sizeof(tempa[0]));
1160       memcpy(l, templ, num_4x4_blocks_high * sizeof(templ[0]));
1161       for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
1162         memcpy(best_dst + idy * 8, dst_init + idy * dst_stride,
1163                num_4x4_blocks_wide * 4);
1164     }
1165   next : {}
1166   }
1167 
1168   if (best_rd >= rd_thresh || x->skip_encode) return best_rd;
1169 
1170   for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy)
1171     memcpy(dst_init + idy * dst_stride, best_dst + idy * 8,
1172            num_4x4_blocks_wide * 4);
1173 
1174   return best_rd;
1175 }
1176 
rd_pick_intra_sub_8x8_y_mode(VP9_COMP * cpi,MACROBLOCK * mb,int * rate,int * rate_y,int64_t * distortion,int64_t best_rd)1177 static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP *cpi, MACROBLOCK *mb,
1178                                             int *rate, int *rate_y,
1179                                             int64_t *distortion,
1180                                             int64_t best_rd) {
1181   int i, j;
1182   const MACROBLOCKD *const xd = &mb->e_mbd;
1183   MODE_INFO *const mic = xd->mi[0];
1184   const MODE_INFO *above_mi = xd->above_mi;
1185   const MODE_INFO *left_mi = xd->left_mi;
1186   const BLOCK_SIZE bsize = xd->mi[0]->sb_type;
1187   const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1188   const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1189   int idx, idy;
1190   int cost = 0;
1191   int64_t total_distortion = 0;
1192   int tot_rate_y = 0;
1193   int64_t total_rd = 0;
1194   const int *bmode_costs = cpi->mbmode_cost;
1195 
1196   // Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block.
1197   for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
1198     for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
1199       PREDICTION_MODE best_mode = DC_PRED;
1200       int r = INT_MAX, ry = INT_MAX;
1201       int64_t d = INT64_MAX, this_rd = INT64_MAX;
1202       i = idy * 2 + idx;
1203       if (cpi->common.frame_type == KEY_FRAME) {
1204         const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, i);
1205         const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, i);
1206 
1207         bmode_costs = cpi->y_mode_costs[A][L];
1208       }
1209 
1210       this_rd = rd_pick_intra4x4block(
1211           cpi, mb, idy, idx, &best_mode, bmode_costs,
1212           xd->plane[0].above_context + idx, xd->plane[0].left_context + idy, &r,
1213           &ry, &d, bsize, best_rd - total_rd);
1214 
1215       if (this_rd >= best_rd - total_rd) return INT64_MAX;
1216 
1217       total_rd += this_rd;
1218       cost += r;
1219       total_distortion += d;
1220       tot_rate_y += ry;
1221 
1222       mic->bmi[i].as_mode = best_mode;
1223       for (j = 1; j < num_4x4_blocks_high; ++j)
1224         mic->bmi[i + j * 2].as_mode = best_mode;
1225       for (j = 1; j < num_4x4_blocks_wide; ++j)
1226         mic->bmi[i + j].as_mode = best_mode;
1227 
1228       if (total_rd >= best_rd) return INT64_MAX;
1229     }
1230   }
1231 
1232   *rate = cost;
1233   *rate_y = tot_rate_y;
1234   *distortion = total_distortion;
1235   mic->mode = mic->bmi[3].as_mode;
1236 
1237   return RDCOST(mb->rdmult, mb->rddiv, cost, total_distortion);
1238 }
1239 
1240 // This function is used only for intra_only frames
rd_pick_intra_sby_mode(VP9_COMP * cpi,MACROBLOCK * x,int * rate,int * rate_tokenonly,int64_t * distortion,int * skippable,BLOCK_SIZE bsize,int64_t best_rd)1241 static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
1242                                       int *rate_tokenonly, int64_t *distortion,
1243                                       int *skippable, BLOCK_SIZE bsize,
1244                                       int64_t best_rd) {
1245   PREDICTION_MODE mode;
1246   PREDICTION_MODE mode_selected = DC_PRED;
1247   MACROBLOCKD *const xd = &x->e_mbd;
1248   MODE_INFO *const mic = xd->mi[0];
1249   int this_rate, this_rate_tokenonly, s;
1250   int64_t this_distortion, this_rd;
1251   TX_SIZE best_tx = TX_4X4;
1252   int *bmode_costs;
1253   const MODE_INFO *above_mi = xd->above_mi;
1254   const MODE_INFO *left_mi = xd->left_mi;
1255   const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, 0);
1256   const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, 0);
1257   bmode_costs = cpi->y_mode_costs[A][L];
1258 
1259   memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm));
1260   /* Y Search for intra prediction mode */
1261   for (mode = DC_PRED; mode <= TM_PRED; mode++) {
1262     if (cpi->sf.use_nonrd_pick_mode) {
1263       // These speed features are turned on in hybrid non-RD and RD mode
1264       // for key frame coding in the context of real-time setting.
1265       if (conditional_skipintra(mode, mode_selected)) continue;
1266       if (*skippable) break;
1267     }
1268 
1269     mic->mode = mode;
1270 
1271     super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, NULL,
1272                     bsize, best_rd);
1273 
1274     if (this_rate_tokenonly == INT_MAX) continue;
1275 
1276     this_rate = this_rate_tokenonly + bmode_costs[mode];
1277     this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
1278 
1279     if (this_rd < best_rd) {
1280       mode_selected = mode;
1281       best_rd = this_rd;
1282       best_tx = mic->tx_size;
1283       *rate = this_rate;
1284       *rate_tokenonly = this_rate_tokenonly;
1285       *distortion = this_distortion;
1286       *skippable = s;
1287     }
1288   }
1289 
1290   mic->mode = mode_selected;
1291   mic->tx_size = best_tx;
1292 
1293   return best_rd;
1294 }
1295 
1296 // Return value 0: early termination triggered, no valid rd cost available;
1297 //              1: rd cost values are valid.
super_block_uvrd(const VP9_COMP * cpi,MACROBLOCK * x,int * rate,int64_t * distortion,int * skippable,int64_t * sse,BLOCK_SIZE bsize,int64_t ref_best_rd)1298 static int super_block_uvrd(const VP9_COMP *cpi, MACROBLOCK *x, int *rate,
1299                             int64_t *distortion, int *skippable, int64_t *sse,
1300                             BLOCK_SIZE bsize, int64_t ref_best_rd) {
1301   MACROBLOCKD *const xd = &x->e_mbd;
1302   MODE_INFO *const mi = xd->mi[0];
1303   const TX_SIZE uv_tx_size = get_uv_tx_size(mi, &xd->plane[1]);
1304   int plane;
1305   int pnrate = 0, pnskip = 1;
1306   int64_t pndist = 0, pnsse = 0;
1307   int is_cost_valid = 1;
1308 
1309   if (ref_best_rd < 0) is_cost_valid = 0;
1310 
1311   if (is_inter_block(mi) && is_cost_valid) {
1312     int plane;
1313     for (plane = 1; plane < MAX_MB_PLANE; ++plane)
1314       vp9_subtract_plane(x, bsize, plane);
1315   }
1316 
1317   *rate = 0;
1318   *distortion = 0;
1319   *sse = 0;
1320   *skippable = 1;
1321 
1322   for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
1323     txfm_rd_in_plane(cpi, x, &pnrate, &pndist, &pnskip, &pnsse, ref_best_rd,
1324                      plane, bsize, uv_tx_size, cpi->sf.use_fast_coef_costing);
1325     if (pnrate == INT_MAX) {
1326       is_cost_valid = 0;
1327       break;
1328     }
1329     *rate += pnrate;
1330     *distortion += pndist;
1331     *sse += pnsse;
1332     *skippable &= pnskip;
1333   }
1334 
1335   if (!is_cost_valid) {
1336     // reset cost value
1337     *rate = INT_MAX;
1338     *distortion = INT64_MAX;
1339     *sse = INT64_MAX;
1340     *skippable = 0;
1341   }
1342 
1343   return is_cost_valid;
1344 }
1345 
rd_pick_intra_sbuv_mode(VP9_COMP * cpi,MACROBLOCK * x,PICK_MODE_CONTEXT * ctx,int * rate,int * rate_tokenonly,int64_t * distortion,int * skippable,BLOCK_SIZE bsize,TX_SIZE max_tx_size)1346 static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x,
1347                                        PICK_MODE_CONTEXT *ctx, int *rate,
1348                                        int *rate_tokenonly, int64_t *distortion,
1349                                        int *skippable, BLOCK_SIZE bsize,
1350                                        TX_SIZE max_tx_size) {
1351   MACROBLOCKD *xd = &x->e_mbd;
1352   PREDICTION_MODE mode;
1353   PREDICTION_MODE mode_selected = DC_PRED;
1354   int64_t best_rd = INT64_MAX, this_rd;
1355   int this_rate_tokenonly, this_rate, s;
1356   int64_t this_distortion, this_sse;
1357 
1358   memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm));
1359   for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
1360     if (!(cpi->sf.intra_uv_mode_mask[max_tx_size] & (1 << mode))) continue;
1361 #if CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
1362     if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) &&
1363         (xd->above_mi == NULL || xd->left_mi == NULL) && need_top_left[mode])
1364       continue;
1365 #endif  // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH
1366 
1367     xd->mi[0]->uv_mode = mode;
1368 
1369     if (!super_block_uvrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s,
1370                           &this_sse, bsize, best_rd))
1371       continue;
1372     this_rate =
1373         this_rate_tokenonly +
1374         cpi->intra_uv_mode_cost[cpi->common.frame_type][xd->mi[0]->mode][mode];
1375     this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion);
1376 
1377     if (this_rd < best_rd) {
1378       mode_selected = mode;
1379       best_rd = this_rd;
1380       *rate = this_rate;
1381       *rate_tokenonly = this_rate_tokenonly;
1382       *distortion = this_distortion;
1383       *skippable = s;
1384       if (!x->select_tx_size) swap_block_ptr(x, ctx, 2, 0, 1, MAX_MB_PLANE);
1385     }
1386   }
1387 
1388   xd->mi[0]->uv_mode = mode_selected;
1389   return best_rd;
1390 }
1391 
rd_sbuv_dcpred(const VP9_COMP * cpi,MACROBLOCK * x,int * rate,int * rate_tokenonly,int64_t * distortion,int * skippable,BLOCK_SIZE bsize)1392 static int64_t rd_sbuv_dcpred(const VP9_COMP *cpi, MACROBLOCK *x, int *rate,
1393                               int *rate_tokenonly, int64_t *distortion,
1394                               int *skippable, BLOCK_SIZE bsize) {
1395   const VP9_COMMON *cm = &cpi->common;
1396   int64_t unused;
1397 
1398   x->e_mbd.mi[0]->uv_mode = DC_PRED;
1399   memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm));
1400   super_block_uvrd(cpi, x, rate_tokenonly, distortion, skippable, &unused,
1401                    bsize, INT64_MAX);
1402   *rate =
1403       *rate_tokenonly +
1404       cpi->intra_uv_mode_cost[cm->frame_type][x->e_mbd.mi[0]->mode][DC_PRED];
1405   return RDCOST(x->rdmult, x->rddiv, *rate, *distortion);
1406 }
1407 
choose_intra_uv_mode(VP9_COMP * cpi,MACROBLOCK * const x,PICK_MODE_CONTEXT * ctx,BLOCK_SIZE bsize,TX_SIZE max_tx_size,int * rate_uv,int * rate_uv_tokenonly,int64_t * dist_uv,int * skip_uv,PREDICTION_MODE * mode_uv)1408 static void choose_intra_uv_mode(VP9_COMP *cpi, MACROBLOCK *const x,
1409                                  PICK_MODE_CONTEXT *ctx, BLOCK_SIZE bsize,
1410                                  TX_SIZE max_tx_size, int *rate_uv,
1411                                  int *rate_uv_tokenonly, int64_t *dist_uv,
1412                                  int *skip_uv, PREDICTION_MODE *mode_uv) {
1413   // Use an estimated rd for uv_intra based on DC_PRED if the
1414   // appropriate speed flag is set.
1415   if (cpi->sf.use_uv_intra_rd_estimate) {
1416     rd_sbuv_dcpred(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv, skip_uv,
1417                    bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize);
1418     // Else do a proper rd search for each possible transform size that may
1419     // be considered in the main rd loop.
1420   } else {
1421     rd_pick_intra_sbuv_mode(cpi, x, ctx, rate_uv, rate_uv_tokenonly, dist_uv,
1422                             skip_uv, bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize,
1423                             max_tx_size);
1424   }
1425   *mode_uv = x->e_mbd.mi[0]->uv_mode;
1426 }
1427 
cost_mv_ref(const VP9_COMP * cpi,PREDICTION_MODE mode,int mode_context)1428 static int cost_mv_ref(const VP9_COMP *cpi, PREDICTION_MODE mode,
1429                        int mode_context) {
1430   assert(is_inter_mode(mode));
1431   return cpi->inter_mode_cost[mode_context][INTER_OFFSET(mode)];
1432 }
1433 
set_and_cost_bmi_mvs(VP9_COMP * cpi,MACROBLOCK * x,MACROBLOCKD * xd,int i,PREDICTION_MODE mode,int_mv this_mv[2],int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],int_mv seg_mvs[MAX_REF_FRAMES],int_mv * best_ref_mv[2],const int * mvjcost,int * mvcost[2])1434 static int set_and_cost_bmi_mvs(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
1435                                 int i, PREDICTION_MODE mode, int_mv this_mv[2],
1436                                 int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
1437                                 int_mv seg_mvs[MAX_REF_FRAMES],
1438                                 int_mv *best_ref_mv[2], const int *mvjcost,
1439                                 int *mvcost[2]) {
1440   MODE_INFO *const mi = xd->mi[0];
1441   const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
1442   int thismvcost = 0;
1443   int idx, idy;
1444   const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mi->sb_type];
1445   const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mi->sb_type];
1446   const int is_compound = has_second_ref(mi);
1447 
1448   switch (mode) {
1449     case NEWMV:
1450       this_mv[0].as_int = seg_mvs[mi->ref_frame[0]].as_int;
1451       thismvcost += vp9_mv_bit_cost(&this_mv[0].as_mv, &best_ref_mv[0]->as_mv,
1452                                     mvjcost, mvcost, MV_COST_WEIGHT_SUB);
1453       if (is_compound) {
1454         this_mv[1].as_int = seg_mvs[mi->ref_frame[1]].as_int;
1455         thismvcost += vp9_mv_bit_cost(&this_mv[1].as_mv, &best_ref_mv[1]->as_mv,
1456                                       mvjcost, mvcost, MV_COST_WEIGHT_SUB);
1457       }
1458       break;
1459     case NEARMV:
1460     case NEARESTMV:
1461       this_mv[0].as_int = frame_mv[mode][mi->ref_frame[0]].as_int;
1462       if (is_compound)
1463         this_mv[1].as_int = frame_mv[mode][mi->ref_frame[1]].as_int;
1464       break;
1465     case ZEROMV:
1466       this_mv[0].as_int = 0;
1467       if (is_compound) this_mv[1].as_int = 0;
1468       break;
1469     default: break;
1470   }
1471 
1472   mi->bmi[i].as_mv[0].as_int = this_mv[0].as_int;
1473   if (is_compound) mi->bmi[i].as_mv[1].as_int = this_mv[1].as_int;
1474 
1475   mi->bmi[i].as_mode = mode;
1476 
1477   for (idy = 0; idy < num_4x4_blocks_high; ++idy)
1478     for (idx = 0; idx < num_4x4_blocks_wide; ++idx)
1479       memmove(&mi->bmi[i + idy * 2 + idx], &mi->bmi[i], sizeof(mi->bmi[i]));
1480 
1481   return cost_mv_ref(cpi, mode, mbmi_ext->mode_context[mi->ref_frame[0]]) +
1482          thismvcost;
1483 }
1484 
encode_inter_mb_segment(VP9_COMP * cpi,MACROBLOCK * x,int64_t best_yrd,int i,int * labelyrate,int64_t * distortion,int64_t * sse,ENTROPY_CONTEXT * ta,ENTROPY_CONTEXT * tl,int mi_row,int mi_col)1485 static int64_t encode_inter_mb_segment(VP9_COMP *cpi, MACROBLOCK *x,
1486                                        int64_t best_yrd, int i, int *labelyrate,
1487                                        int64_t *distortion, int64_t *sse,
1488                                        ENTROPY_CONTEXT *ta, ENTROPY_CONTEXT *tl,
1489                                        int mi_row, int mi_col) {
1490   int k;
1491   MACROBLOCKD *xd = &x->e_mbd;
1492   struct macroblockd_plane *const pd = &xd->plane[0];
1493   struct macroblock_plane *const p = &x->plane[0];
1494   MODE_INFO *const mi = xd->mi[0];
1495   const BLOCK_SIZE plane_bsize = get_plane_block_size(mi->sb_type, pd);
1496   const int width = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
1497   const int height = 4 * num_4x4_blocks_high_lookup[plane_bsize];
1498   int idx, idy;
1499 
1500   const uint8_t *const src =
1501       &p->src.buf[vp9_raster_block_offset(BLOCK_8X8, i, p->src.stride)];
1502   uint8_t *const dst =
1503       &pd->dst.buf[vp9_raster_block_offset(BLOCK_8X8, i, pd->dst.stride)];
1504   int64_t thisdistortion = 0, thissse = 0;
1505   int thisrate = 0, ref;
1506   const scan_order *so = &vp9_default_scan_orders[TX_4X4];
1507   const int is_compound = has_second_ref(mi);
1508   const InterpKernel *kernel = vp9_filter_kernels[mi->interp_filter];
1509 
1510   for (ref = 0; ref < 1 + is_compound; ++ref) {
1511     const int bw = b_width_log2_lookup[BLOCK_8X8];
1512     const int h = 4 * (i >> bw);
1513     const int w = 4 * (i & ((1 << bw) - 1));
1514     const struct scale_factors *sf = &xd->block_refs[ref]->sf;
1515     int y_stride = pd->pre[ref].stride;
1516     uint8_t *pre = pd->pre[ref].buf + (h * pd->pre[ref].stride + w);
1517 
1518     if (vp9_is_scaled(sf)) {
1519       const int x_start = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x));
1520       const int y_start = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y));
1521 
1522       y_stride = xd->block_refs[ref]->buf->y_stride;
1523       pre = xd->block_refs[ref]->buf->y_buffer;
1524       pre += scaled_buffer_offset(x_start + w, y_start + h, y_stride, sf);
1525     }
1526 #if CONFIG_VP9_HIGHBITDEPTH
1527     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
1528       vp9_highbd_build_inter_predictor(
1529           pre, y_stride, dst, pd->dst.stride, &mi->bmi[i].as_mv[ref].as_mv,
1530           &xd->block_refs[ref]->sf, width, height, ref, kernel, MV_PRECISION_Q3,
1531           mi_col * MI_SIZE + 4 * (i % 2), mi_row * MI_SIZE + 4 * (i / 2),
1532           xd->bd);
1533     } else {
1534       vp9_build_inter_predictor(
1535           pre, y_stride, dst, pd->dst.stride, &mi->bmi[i].as_mv[ref].as_mv,
1536           &xd->block_refs[ref]->sf, width, height, ref, kernel, MV_PRECISION_Q3,
1537           mi_col * MI_SIZE + 4 * (i % 2), mi_row * MI_SIZE + 4 * (i / 2));
1538     }
1539 #else
1540     vp9_build_inter_predictor(
1541         pre, y_stride, dst, pd->dst.stride, &mi->bmi[i].as_mv[ref].as_mv,
1542         &xd->block_refs[ref]->sf, width, height, ref, kernel, MV_PRECISION_Q3,
1543         mi_col * MI_SIZE + 4 * (i % 2), mi_row * MI_SIZE + 4 * (i / 2));
1544 #endif  // CONFIG_VP9_HIGHBITDEPTH
1545   }
1546 
1547 #if CONFIG_VP9_HIGHBITDEPTH
1548   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
1549     vpx_highbd_subtract_block(
1550         height, width, vp9_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff),
1551         8, src, p->src.stride, dst, pd->dst.stride, xd->bd);
1552   } else {
1553     vpx_subtract_block(height, width,
1554                        vp9_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff),
1555                        8, src, p->src.stride, dst, pd->dst.stride);
1556   }
1557 #else
1558   vpx_subtract_block(height, width,
1559                      vp9_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff),
1560                      8, src, p->src.stride, dst, pd->dst.stride);
1561 #endif  // CONFIG_VP9_HIGHBITDEPTH
1562 
1563   k = i;
1564   for (idy = 0; idy < height / 4; ++idy) {
1565     for (idx = 0; idx < width / 4; ++idx) {
1566 #if CONFIG_VP9_HIGHBITDEPTH
1567       const int bd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd : 8;
1568 #endif
1569       int64_t ssz, rd, rd1, rd2;
1570       tran_low_t *coeff;
1571       int coeff_ctx;
1572       k += (idy * 2 + idx);
1573       coeff_ctx = combine_entropy_contexts(ta[k & 1], tl[k >> 1]);
1574       coeff = BLOCK_OFFSET(p->coeff, k);
1575       x->fwd_txm4x4(vp9_raster_block_offset_int16(BLOCK_8X8, k, p->src_diff),
1576                     coeff, 8);
1577       vp9_regular_quantize_b_4x4(x, 0, k, so->scan, so->iscan);
1578 #if CONFIG_VP9_HIGHBITDEPTH
1579       thisdistortion += vp9_highbd_block_error_dispatch(
1580           coeff, BLOCK_OFFSET(pd->dqcoeff, k), 16, &ssz, bd);
1581 #else
1582       thisdistortion +=
1583           vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k), 16, &ssz);
1584 #endif  // CONFIG_VP9_HIGHBITDEPTH
1585       thissse += ssz;
1586       thisrate += cost_coeffs(x, 0, k, TX_4X4, coeff_ctx, so->scan,
1587                               so->neighbors, cpi->sf.use_fast_coef_costing);
1588       ta[k & 1] = tl[k >> 1] = (x->plane[0].eobs[k] > 0) ? 1 : 0;
1589       rd1 = RDCOST(x->rdmult, x->rddiv, thisrate, thisdistortion >> 2);
1590       rd2 = RDCOST(x->rdmult, x->rddiv, 0, thissse >> 2);
1591       rd = VPXMIN(rd1, rd2);
1592       if (rd >= best_yrd) return INT64_MAX;
1593     }
1594   }
1595 
1596   *distortion = thisdistortion >> 2;
1597   *labelyrate = thisrate;
1598   *sse = thissse >> 2;
1599 
1600   return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion);
1601 }
1602 
1603 typedef struct {
1604   int eobs;
1605   int brate;
1606   int byrate;
1607   int64_t bdist;
1608   int64_t bsse;
1609   int64_t brdcost;
1610   int_mv mvs[2];
1611   ENTROPY_CONTEXT ta[2];
1612   ENTROPY_CONTEXT tl[2];
1613 } SEG_RDSTAT;
1614 
1615 typedef struct {
1616   int_mv *ref_mv[2];
1617   int_mv mvp;
1618 
1619   int64_t segment_rd;
1620   int r;
1621   int64_t d;
1622   int64_t sse;
1623   int segment_yrate;
1624   PREDICTION_MODE modes[4];
1625   SEG_RDSTAT rdstat[4][INTER_MODES];
1626   int mvthresh;
1627 } BEST_SEG_INFO;
1628 
mv_check_bounds(const MvLimits * mv_limits,const MV * mv)1629 static INLINE int mv_check_bounds(const MvLimits *mv_limits, const MV *mv) {
1630   return (mv->row >> 3) < mv_limits->row_min ||
1631          (mv->row >> 3) > mv_limits->row_max ||
1632          (mv->col >> 3) < mv_limits->col_min ||
1633          (mv->col >> 3) > mv_limits->col_max;
1634 }
1635 
mi_buf_shift(MACROBLOCK * x,int i)1636 static INLINE void mi_buf_shift(MACROBLOCK *x, int i) {
1637   MODE_INFO *const mi = x->e_mbd.mi[0];
1638   struct macroblock_plane *const p = &x->plane[0];
1639   struct macroblockd_plane *const pd = &x->e_mbd.plane[0];
1640 
1641   p->src.buf =
1642       &p->src.buf[vp9_raster_block_offset(BLOCK_8X8, i, p->src.stride)];
1643   assert(((intptr_t)pd->pre[0].buf & 0x7) == 0);
1644   pd->pre[0].buf =
1645       &pd->pre[0].buf[vp9_raster_block_offset(BLOCK_8X8, i, pd->pre[0].stride)];
1646   if (has_second_ref(mi))
1647     pd->pre[1].buf =
1648         &pd->pre[1]
1649              .buf[vp9_raster_block_offset(BLOCK_8X8, i, pd->pre[1].stride)];
1650 }
1651 
mi_buf_restore(MACROBLOCK * x,struct buf_2d orig_src,struct buf_2d orig_pre[2])1652 static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src,
1653                                   struct buf_2d orig_pre[2]) {
1654   MODE_INFO *mi = x->e_mbd.mi[0];
1655   x->plane[0].src = orig_src;
1656   x->e_mbd.plane[0].pre[0] = orig_pre[0];
1657   if (has_second_ref(mi)) x->e_mbd.plane[0].pre[1] = orig_pre[1];
1658 }
1659 
mv_has_subpel(const MV * mv)1660 static INLINE int mv_has_subpel(const MV *mv) {
1661   return (mv->row & 0x0F) || (mv->col & 0x0F);
1662 }
1663 
1664 // Check if NEARESTMV/NEARMV/ZEROMV is the cheapest way encode zero motion.
1665 // TODO(aconverse): Find out if this is still productive then clean up or remove
check_best_zero_mv(const VP9_COMP * cpi,const uint8_t mode_context[MAX_REF_FRAMES],int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],int this_mode,const MV_REFERENCE_FRAME ref_frames[2])1666 static int check_best_zero_mv(const VP9_COMP *cpi,
1667                               const uint8_t mode_context[MAX_REF_FRAMES],
1668                               int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES],
1669                               int this_mode,
1670                               const MV_REFERENCE_FRAME ref_frames[2]) {
1671   if ((this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) &&
1672       frame_mv[this_mode][ref_frames[0]].as_int == 0 &&
1673       (ref_frames[1] == NONE ||
1674        frame_mv[this_mode][ref_frames[1]].as_int == 0)) {
1675     int rfc = mode_context[ref_frames[0]];
1676     int c1 = cost_mv_ref(cpi, NEARMV, rfc);
1677     int c2 = cost_mv_ref(cpi, NEARESTMV, rfc);
1678     int c3 = cost_mv_ref(cpi, ZEROMV, rfc);
1679 
1680     if (this_mode == NEARMV) {
1681       if (c1 > c3) return 0;
1682     } else if (this_mode == NEARESTMV) {
1683       if (c2 > c3) return 0;
1684     } else {
1685       assert(this_mode == ZEROMV);
1686       if (ref_frames[1] == NONE) {
1687         if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0) ||
1688             (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0))
1689           return 0;
1690       } else {
1691         if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0 &&
1692              frame_mv[NEARESTMV][ref_frames[1]].as_int == 0) ||
1693             (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0 &&
1694              frame_mv[NEARMV][ref_frames[1]].as_int == 0))
1695           return 0;
1696       }
1697     }
1698   }
1699   return 1;
1700 }
1701 
joint_motion_search(VP9_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bsize,int_mv * frame_mv,int mi_row,int mi_col,int_mv single_newmv[MAX_REF_FRAMES],int * rate_mv)1702 static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
1703                                 int_mv *frame_mv, int mi_row, int mi_col,
1704                                 int_mv single_newmv[MAX_REF_FRAMES],
1705                                 int *rate_mv) {
1706   const VP9_COMMON *const cm = &cpi->common;
1707   const int pw = 4 * num_4x4_blocks_wide_lookup[bsize];
1708   const int ph = 4 * num_4x4_blocks_high_lookup[bsize];
1709   MACROBLOCKD *xd = &x->e_mbd;
1710   MODE_INFO *mi = xd->mi[0];
1711   const int refs[2] = { mi->ref_frame[0],
1712                         mi->ref_frame[1] < 0 ? 0 : mi->ref_frame[1] };
1713   int_mv ref_mv[2];
1714   int ite, ref;
1715   const InterpKernel *kernel = vp9_filter_kernels[mi->interp_filter];
1716   struct scale_factors sf;
1717 
1718   // Do joint motion search in compound mode to get more accurate mv.
1719   struct buf_2d backup_yv12[2][MAX_MB_PLANE];
1720   uint32_t last_besterr[2] = { UINT_MAX, UINT_MAX };
1721   const YV12_BUFFER_CONFIG *const scaled_ref_frame[2] = {
1722     vp9_get_scaled_ref_frame(cpi, mi->ref_frame[0]),
1723     vp9_get_scaled_ref_frame(cpi, mi->ref_frame[1])
1724   };
1725 
1726 // Prediction buffer from second frame.
1727 #if CONFIG_VP9_HIGHBITDEPTH
1728   DECLARE_ALIGNED(16, uint16_t, second_pred_alloc_16[64 * 64]);
1729   uint8_t *second_pred;
1730 #else
1731   DECLARE_ALIGNED(16, uint8_t, second_pred[64 * 64]);
1732 #endif  // CONFIG_VP9_HIGHBITDEPTH
1733 
1734   for (ref = 0; ref < 2; ++ref) {
1735     ref_mv[ref] = x->mbmi_ext->ref_mvs[refs[ref]][0];
1736 
1737     if (scaled_ref_frame[ref]) {
1738       int i;
1739       // Swap out the reference frame for a version that's been scaled to
1740       // match the resolution of the current frame, allowing the existing
1741       // motion search code to be used without additional modifications.
1742       for (i = 0; i < MAX_MB_PLANE; i++)
1743         backup_yv12[ref][i] = xd->plane[i].pre[ref];
1744       vp9_setup_pre_planes(xd, ref, scaled_ref_frame[ref], mi_row, mi_col,
1745                            NULL);
1746     }
1747 
1748     frame_mv[refs[ref]].as_int = single_newmv[refs[ref]].as_int;
1749   }
1750 
1751 // Since we have scaled the reference frames to match the size of the current
1752 // frame we must use a unit scaling factor during mode selection.
1753 #if CONFIG_VP9_HIGHBITDEPTH
1754   vp9_setup_scale_factors_for_frame(&sf, cm->width, cm->height, cm->width,
1755                                     cm->height, cm->use_highbitdepth);
1756 #else
1757   vp9_setup_scale_factors_for_frame(&sf, cm->width, cm->height, cm->width,
1758                                     cm->height);
1759 #endif  // CONFIG_VP9_HIGHBITDEPTH
1760 
1761   // Allow joint search multiple times iteratively for each reference frame
1762   // and break out of the search loop if it couldn't find a better mv.
1763   for (ite = 0; ite < 4; ite++) {
1764     struct buf_2d ref_yv12[2];
1765     uint32_t bestsme = UINT_MAX;
1766     int sadpb = x->sadperbit16;
1767     MV tmp_mv;
1768     int search_range = 3;
1769 
1770     const MvLimits tmp_mv_limits = x->mv_limits;
1771     int id = ite % 2;  // Even iterations search in the first reference frame,
1772                        // odd iterations search in the second. The predictor
1773                        // found for the 'other' reference frame is factored in.
1774 
1775     // Initialized here because of compiler problem in Visual Studio.
1776     ref_yv12[0] = xd->plane[0].pre[0];
1777     ref_yv12[1] = xd->plane[0].pre[1];
1778 
1779 // Get the prediction block from the 'other' reference frame.
1780 #if CONFIG_VP9_HIGHBITDEPTH
1781     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
1782       second_pred = CONVERT_TO_BYTEPTR(second_pred_alloc_16);
1783       vp9_highbd_build_inter_predictor(
1784           ref_yv12[!id].buf, ref_yv12[!id].stride, second_pred, pw,
1785           &frame_mv[refs[!id]].as_mv, &sf, pw, ph, 0, kernel, MV_PRECISION_Q3,
1786           mi_col * MI_SIZE, mi_row * MI_SIZE, xd->bd);
1787     } else {
1788       second_pred = (uint8_t *)second_pred_alloc_16;
1789       vp9_build_inter_predictor(ref_yv12[!id].buf, ref_yv12[!id].stride,
1790                                 second_pred, pw, &frame_mv[refs[!id]].as_mv,
1791                                 &sf, pw, ph, 0, kernel, MV_PRECISION_Q3,
1792                                 mi_col * MI_SIZE, mi_row * MI_SIZE);
1793     }
1794 #else
1795     vp9_build_inter_predictor(ref_yv12[!id].buf, ref_yv12[!id].stride,
1796                               second_pred, pw, &frame_mv[refs[!id]].as_mv, &sf,
1797                               pw, ph, 0, kernel, MV_PRECISION_Q3,
1798                               mi_col * MI_SIZE, mi_row * MI_SIZE);
1799 #endif  // CONFIG_VP9_HIGHBITDEPTH
1800 
1801     // Do compound motion search on the current reference frame.
1802     if (id) xd->plane[0].pre[0] = ref_yv12[id];
1803     vp9_set_mv_search_range(&x->mv_limits, &ref_mv[id].as_mv);
1804 
1805     // Use the mv result from the single mode as mv predictor.
1806     tmp_mv = frame_mv[refs[id]].as_mv;
1807 
1808     tmp_mv.col >>= 3;
1809     tmp_mv.row >>= 3;
1810 
1811     // Small-range full-pixel motion search.
1812     bestsme = vp9_refining_search_8p_c(x, &tmp_mv, sadpb, search_range,
1813                                        &cpi->fn_ptr[bsize], &ref_mv[id].as_mv,
1814                                        second_pred);
1815     if (bestsme < UINT_MAX)
1816       bestsme = vp9_get_mvpred_av_var(x, &tmp_mv, &ref_mv[id].as_mv,
1817                                       second_pred, &cpi->fn_ptr[bsize], 1);
1818 
1819     x->mv_limits = tmp_mv_limits;
1820 
1821     if (bestsme < UINT_MAX) {
1822       uint32_t dis; /* TODO: use dis in distortion calculation later. */
1823       uint32_t sse;
1824       bestsme = cpi->find_fractional_mv_step(
1825           x, &tmp_mv, &ref_mv[id].as_mv, cpi->common.allow_high_precision_mv,
1826           x->errorperbit, &cpi->fn_ptr[bsize], 0,
1827           cpi->sf.mv.subpel_iters_per_step, NULL, x->nmvjointcost, x->mvcost,
1828           &dis, &sse, second_pred, pw, ph);
1829     }
1830 
1831     // Restore the pointer to the first (possibly scaled) prediction buffer.
1832     if (id) xd->plane[0].pre[0] = ref_yv12[0];
1833 
1834     if (bestsme < last_besterr[id]) {
1835       frame_mv[refs[id]].as_mv = tmp_mv;
1836       last_besterr[id] = bestsme;
1837     } else {
1838       break;
1839     }
1840   }
1841 
1842   *rate_mv = 0;
1843 
1844   for (ref = 0; ref < 2; ++ref) {
1845     if (scaled_ref_frame[ref]) {
1846       // Restore the prediction frame pointers to their unscaled versions.
1847       int i;
1848       for (i = 0; i < MAX_MB_PLANE; i++)
1849         xd->plane[i].pre[ref] = backup_yv12[ref][i];
1850     }
1851 
1852     *rate_mv += vp9_mv_bit_cost(&frame_mv[refs[ref]].as_mv,
1853                                 &x->mbmi_ext->ref_mvs[refs[ref]][0].as_mv,
1854                                 x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
1855   }
1856 }
1857 
rd_pick_best_sub8x8_mode(VP9_COMP * cpi,MACROBLOCK * x,int_mv * best_ref_mv,int_mv * second_best_ref_mv,int64_t best_rd,int * returntotrate,int * returnyrate,int64_t * returndistortion,int * skippable,int64_t * psse,int mvthresh,int_mv seg_mvs[4][MAX_REF_FRAMES],BEST_SEG_INFO * bsi_buf,int filter_idx,int mi_row,int mi_col)1858 static int64_t rd_pick_best_sub8x8_mode(
1859     VP9_COMP *cpi, MACROBLOCK *x, int_mv *best_ref_mv,
1860     int_mv *second_best_ref_mv, int64_t best_rd, int *returntotrate,
1861     int *returnyrate, int64_t *returndistortion, int *skippable, int64_t *psse,
1862     int mvthresh, int_mv seg_mvs[4][MAX_REF_FRAMES], BEST_SEG_INFO *bsi_buf,
1863     int filter_idx, int mi_row, int mi_col) {
1864   int i;
1865   BEST_SEG_INFO *bsi = bsi_buf + filter_idx;
1866   MACROBLOCKD *xd = &x->e_mbd;
1867   MODE_INFO *mi = xd->mi[0];
1868   int mode_idx;
1869   int k, br = 0, idx, idy;
1870   int64_t bd = 0, block_sse = 0;
1871   PREDICTION_MODE this_mode;
1872   VP9_COMMON *cm = &cpi->common;
1873   struct macroblock_plane *const p = &x->plane[0];
1874   struct macroblockd_plane *const pd = &xd->plane[0];
1875   const int label_count = 4;
1876   int64_t this_segment_rd = 0;
1877   int label_mv_thresh;
1878   int segmentyrate = 0;
1879   const BLOCK_SIZE bsize = mi->sb_type;
1880   const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1881   const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1882   ENTROPY_CONTEXT t_above[2], t_left[2];
1883   int subpelmv = 1, have_ref = 0;
1884   SPEED_FEATURES *const sf = &cpi->sf;
1885   const int has_second_rf = has_second_ref(mi);
1886   const int inter_mode_mask = sf->inter_mode_mask[bsize];
1887   MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
1888 
1889   vp9_zero(*bsi);
1890 
1891   bsi->segment_rd = best_rd;
1892   bsi->ref_mv[0] = best_ref_mv;
1893   bsi->ref_mv[1] = second_best_ref_mv;
1894   bsi->mvp.as_int = best_ref_mv->as_int;
1895   bsi->mvthresh = mvthresh;
1896 
1897   for (i = 0; i < 4; i++) bsi->modes[i] = ZEROMV;
1898 
1899   memcpy(t_above, pd->above_context, sizeof(t_above));
1900   memcpy(t_left, pd->left_context, sizeof(t_left));
1901 
1902   // 64 makes this threshold really big effectively
1903   // making it so that we very rarely check mvs on
1904   // segments.   setting this to 1 would make mv thresh
1905   // roughly equal to what it is for macroblocks
1906   label_mv_thresh = 1 * bsi->mvthresh / label_count;
1907 
1908   // Segmentation method overheads
1909   for (idy = 0; idy < 2; idy += num_4x4_blocks_high) {
1910     for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) {
1911       // TODO(jingning,rbultje): rewrite the rate-distortion optimization
1912       // loop for 4x4/4x8/8x4 block coding. to be replaced with new rd loop
1913       int_mv mode_mv[MB_MODE_COUNT][2];
1914       int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
1915       PREDICTION_MODE mode_selected = ZEROMV;
1916       int64_t best_rd = INT64_MAX;
1917       const int i = idy * 2 + idx;
1918       int ref;
1919 
1920       for (ref = 0; ref < 1 + has_second_rf; ++ref) {
1921         const MV_REFERENCE_FRAME frame = mi->ref_frame[ref];
1922         frame_mv[ZEROMV][frame].as_int = 0;
1923         vp9_append_sub8x8_mvs_for_idx(
1924             cm, xd, i, ref, mi_row, mi_col, &frame_mv[NEARESTMV][frame],
1925             &frame_mv[NEARMV][frame], mbmi_ext->mode_context);
1926       }
1927 
1928       // search for the best motion vector on this segment
1929       for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
1930         const struct buf_2d orig_src = x->plane[0].src;
1931         struct buf_2d orig_pre[2];
1932 
1933         mode_idx = INTER_OFFSET(this_mode);
1934         bsi->rdstat[i][mode_idx].brdcost = INT64_MAX;
1935         if (!(inter_mode_mask & (1 << this_mode))) continue;
1936 
1937         if (!check_best_zero_mv(cpi, mbmi_ext->mode_context, frame_mv,
1938                                 this_mode, mi->ref_frame))
1939           continue;
1940 
1941         memcpy(orig_pre, pd->pre, sizeof(orig_pre));
1942         memcpy(bsi->rdstat[i][mode_idx].ta, t_above,
1943                sizeof(bsi->rdstat[i][mode_idx].ta));
1944         memcpy(bsi->rdstat[i][mode_idx].tl, t_left,
1945                sizeof(bsi->rdstat[i][mode_idx].tl));
1946 
1947         // motion search for newmv (single predictor case only)
1948         if (!has_second_rf && this_mode == NEWMV &&
1949             seg_mvs[i][mi->ref_frame[0]].as_int == INVALID_MV) {
1950           MV *const new_mv = &mode_mv[NEWMV][0].as_mv;
1951           int step_param = 0;
1952           uint32_t bestsme = UINT_MAX;
1953           int sadpb = x->sadperbit4;
1954           MV mvp_full;
1955           int max_mv;
1956           int cost_list[5];
1957           const MvLimits tmp_mv_limits = x->mv_limits;
1958 
1959           /* Is the best so far sufficiently good that we cant justify doing
1960            * and new motion search. */
1961           if (best_rd < label_mv_thresh) break;
1962 
1963           if (cpi->oxcf.mode != BEST) {
1964             // use previous block's result as next block's MV predictor.
1965             if (i > 0) {
1966               bsi->mvp.as_int = mi->bmi[i - 1].as_mv[0].as_int;
1967               if (i == 2) bsi->mvp.as_int = mi->bmi[i - 2].as_mv[0].as_int;
1968             }
1969           }
1970           if (i == 0)
1971             max_mv = x->max_mv_context[mi->ref_frame[0]];
1972           else
1973             max_mv =
1974                 VPXMAX(abs(bsi->mvp.as_mv.row), abs(bsi->mvp.as_mv.col)) >> 3;
1975 
1976           if (sf->mv.auto_mv_step_size && cm->show_frame) {
1977             // Take wtd average of the step_params based on the last frame's
1978             // max mv magnitude and the best ref mvs of the current block for
1979             // the given reference.
1980             step_param =
1981                 (vp9_init_search_range(max_mv) + cpi->mv_step_param) / 2;
1982           } else {
1983             step_param = cpi->mv_step_param;
1984           }
1985 
1986           mvp_full.row = bsi->mvp.as_mv.row >> 3;
1987           mvp_full.col = bsi->mvp.as_mv.col >> 3;
1988 
1989           if (sf->adaptive_motion_search) {
1990             mvp_full.row = x->pred_mv[mi->ref_frame[0]].row >> 3;
1991             mvp_full.col = x->pred_mv[mi->ref_frame[0]].col >> 3;
1992             step_param = VPXMAX(step_param, 8);
1993           }
1994 
1995           // adjust src pointer for this block
1996           mi_buf_shift(x, i);
1997 
1998           vp9_set_mv_search_range(&x->mv_limits, &bsi->ref_mv[0]->as_mv);
1999 
2000           bestsme = vp9_full_pixel_search(
2001               cpi, x, bsize, &mvp_full, step_param, sadpb,
2002               sf->mv.subpel_search_method != SUBPEL_TREE ? cost_list : NULL,
2003               &bsi->ref_mv[0]->as_mv, new_mv, INT_MAX, 1);
2004 
2005           x->mv_limits = tmp_mv_limits;
2006 
2007           if (bestsme < UINT_MAX) {
2008             uint32_t distortion;
2009             cpi->find_fractional_mv_step(
2010                 x, new_mv, &bsi->ref_mv[0]->as_mv, cm->allow_high_precision_mv,
2011                 x->errorperbit, &cpi->fn_ptr[bsize], sf->mv.subpel_force_stop,
2012                 sf->mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list),
2013                 x->nmvjointcost, x->mvcost, &distortion,
2014                 &x->pred_sse[mi->ref_frame[0]], NULL, 0, 0);
2015 
2016             // save motion search result for use in compound prediction
2017             seg_mvs[i][mi->ref_frame[0]].as_mv = *new_mv;
2018           }
2019 
2020           if (sf->adaptive_motion_search)
2021             x->pred_mv[mi->ref_frame[0]] = *new_mv;
2022 
2023           // restore src pointers
2024           mi_buf_restore(x, orig_src, orig_pre);
2025         }
2026 
2027         if (has_second_rf) {
2028           if (seg_mvs[i][mi->ref_frame[1]].as_int == INVALID_MV ||
2029               seg_mvs[i][mi->ref_frame[0]].as_int == INVALID_MV)
2030             continue;
2031         }
2032 
2033         if (has_second_rf && this_mode == NEWMV &&
2034             mi->interp_filter == EIGHTTAP) {
2035           // adjust src pointers
2036           mi_buf_shift(x, i);
2037           if (sf->comp_inter_joint_search_thresh <= bsize) {
2038             int rate_mv;
2039             joint_motion_search(cpi, x, bsize, frame_mv[this_mode], mi_row,
2040                                 mi_col, seg_mvs[i], &rate_mv);
2041             seg_mvs[i][mi->ref_frame[0]].as_int =
2042                 frame_mv[this_mode][mi->ref_frame[0]].as_int;
2043             seg_mvs[i][mi->ref_frame[1]].as_int =
2044                 frame_mv[this_mode][mi->ref_frame[1]].as_int;
2045           }
2046           // restore src pointers
2047           mi_buf_restore(x, orig_src, orig_pre);
2048         }
2049 
2050         bsi->rdstat[i][mode_idx].brate = set_and_cost_bmi_mvs(
2051             cpi, x, xd, i, this_mode, mode_mv[this_mode], frame_mv, seg_mvs[i],
2052             bsi->ref_mv, x->nmvjointcost, x->mvcost);
2053 
2054         for (ref = 0; ref < 1 + has_second_rf; ++ref) {
2055           bsi->rdstat[i][mode_idx].mvs[ref].as_int =
2056               mode_mv[this_mode][ref].as_int;
2057           if (num_4x4_blocks_wide > 1)
2058             bsi->rdstat[i + 1][mode_idx].mvs[ref].as_int =
2059                 mode_mv[this_mode][ref].as_int;
2060           if (num_4x4_blocks_high > 1)
2061             bsi->rdstat[i + 2][mode_idx].mvs[ref].as_int =
2062                 mode_mv[this_mode][ref].as_int;
2063         }
2064 
2065         // Trap vectors that reach beyond the UMV borders
2066         if (mv_check_bounds(&x->mv_limits, &mode_mv[this_mode][0].as_mv) ||
2067             (has_second_rf &&
2068              mv_check_bounds(&x->mv_limits, &mode_mv[this_mode][1].as_mv)))
2069           continue;
2070 
2071         if (filter_idx > 0) {
2072           BEST_SEG_INFO *ref_bsi = bsi_buf;
2073           subpelmv = 0;
2074           have_ref = 1;
2075 
2076           for (ref = 0; ref < 1 + has_second_rf; ++ref) {
2077             subpelmv |= mv_has_subpel(&mode_mv[this_mode][ref].as_mv);
2078             have_ref &= mode_mv[this_mode][ref].as_int ==
2079                         ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int;
2080           }
2081 
2082           if (filter_idx > 1 && !subpelmv && !have_ref) {
2083             ref_bsi = bsi_buf + 1;
2084             have_ref = 1;
2085             for (ref = 0; ref < 1 + has_second_rf; ++ref)
2086               have_ref &= mode_mv[this_mode][ref].as_int ==
2087                           ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int;
2088           }
2089 
2090           if (!subpelmv && have_ref &&
2091               ref_bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
2092             memcpy(&bsi->rdstat[i][mode_idx], &ref_bsi->rdstat[i][mode_idx],
2093                    sizeof(SEG_RDSTAT));
2094             if (num_4x4_blocks_wide > 1)
2095               bsi->rdstat[i + 1][mode_idx].eobs =
2096                   ref_bsi->rdstat[i + 1][mode_idx].eobs;
2097             if (num_4x4_blocks_high > 1)
2098               bsi->rdstat[i + 2][mode_idx].eobs =
2099                   ref_bsi->rdstat[i + 2][mode_idx].eobs;
2100 
2101             if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
2102               mode_selected = this_mode;
2103               best_rd = bsi->rdstat[i][mode_idx].brdcost;
2104             }
2105             continue;
2106           }
2107         }
2108 
2109         bsi->rdstat[i][mode_idx].brdcost = encode_inter_mb_segment(
2110             cpi, x, bsi->segment_rd - this_segment_rd, i,
2111             &bsi->rdstat[i][mode_idx].byrate, &bsi->rdstat[i][mode_idx].bdist,
2112             &bsi->rdstat[i][mode_idx].bsse, bsi->rdstat[i][mode_idx].ta,
2113             bsi->rdstat[i][mode_idx].tl, mi_row, mi_col);
2114         if (bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) {
2115           bsi->rdstat[i][mode_idx].brdcost +=
2116               RDCOST(x->rdmult, x->rddiv, bsi->rdstat[i][mode_idx].brate, 0);
2117           bsi->rdstat[i][mode_idx].brate += bsi->rdstat[i][mode_idx].byrate;
2118           bsi->rdstat[i][mode_idx].eobs = p->eobs[i];
2119           if (num_4x4_blocks_wide > 1)
2120             bsi->rdstat[i + 1][mode_idx].eobs = p->eobs[i + 1];
2121           if (num_4x4_blocks_high > 1)
2122             bsi->rdstat[i + 2][mode_idx].eobs = p->eobs[i + 2];
2123         }
2124 
2125         if (bsi->rdstat[i][mode_idx].brdcost < best_rd) {
2126           mode_selected = this_mode;
2127           best_rd = bsi->rdstat[i][mode_idx].brdcost;
2128         }
2129       } /*for each 4x4 mode*/
2130 
2131       if (best_rd == INT64_MAX) {
2132         int iy, midx;
2133         for (iy = i + 1; iy < 4; ++iy)
2134           for (midx = 0; midx < INTER_MODES; ++midx)
2135             bsi->rdstat[iy][midx].brdcost = INT64_MAX;
2136         bsi->segment_rd = INT64_MAX;
2137         return INT64_MAX;
2138       }
2139 
2140       mode_idx = INTER_OFFSET(mode_selected);
2141       memcpy(t_above, bsi->rdstat[i][mode_idx].ta, sizeof(t_above));
2142       memcpy(t_left, bsi->rdstat[i][mode_idx].tl, sizeof(t_left));
2143 
2144       set_and_cost_bmi_mvs(cpi, x, xd, i, mode_selected, mode_mv[mode_selected],
2145                            frame_mv, seg_mvs[i], bsi->ref_mv, x->nmvjointcost,
2146                            x->mvcost);
2147 
2148       br += bsi->rdstat[i][mode_idx].brate;
2149       bd += bsi->rdstat[i][mode_idx].bdist;
2150       block_sse += bsi->rdstat[i][mode_idx].bsse;
2151       segmentyrate += bsi->rdstat[i][mode_idx].byrate;
2152       this_segment_rd += bsi->rdstat[i][mode_idx].brdcost;
2153 
2154       if (this_segment_rd > bsi->segment_rd) {
2155         int iy, midx;
2156         for (iy = i + 1; iy < 4; ++iy)
2157           for (midx = 0; midx < INTER_MODES; ++midx)
2158             bsi->rdstat[iy][midx].brdcost = INT64_MAX;
2159         bsi->segment_rd = INT64_MAX;
2160         return INT64_MAX;
2161       }
2162     }
2163   } /* for each label */
2164 
2165   bsi->r = br;
2166   bsi->d = bd;
2167   bsi->segment_yrate = segmentyrate;
2168   bsi->segment_rd = this_segment_rd;
2169   bsi->sse = block_sse;
2170 
2171   // update the coding decisions
2172   for (k = 0; k < 4; ++k) bsi->modes[k] = mi->bmi[k].as_mode;
2173 
2174   if (bsi->segment_rd > best_rd) return INT64_MAX;
2175   /* set it to the best */
2176   for (i = 0; i < 4; i++) {
2177     mode_idx = INTER_OFFSET(bsi->modes[i]);
2178     mi->bmi[i].as_mv[0].as_int = bsi->rdstat[i][mode_idx].mvs[0].as_int;
2179     if (has_second_ref(mi))
2180       mi->bmi[i].as_mv[1].as_int = bsi->rdstat[i][mode_idx].mvs[1].as_int;
2181     x->plane[0].eobs[i] = bsi->rdstat[i][mode_idx].eobs;
2182     mi->bmi[i].as_mode = bsi->modes[i];
2183   }
2184 
2185   /*
2186    * used to set mbmi->mv.as_int
2187    */
2188   *returntotrate = bsi->r;
2189   *returndistortion = bsi->d;
2190   *returnyrate = bsi->segment_yrate;
2191   *skippable = vp9_is_skippable_in_plane(x, BLOCK_8X8, 0);
2192   *psse = bsi->sse;
2193   mi->mode = bsi->modes[3];
2194 
2195   return bsi->segment_rd;
2196 }
2197 
estimate_ref_frame_costs(const VP9_COMMON * cm,const MACROBLOCKD * xd,int segment_id,unsigned int * ref_costs_single,unsigned int * ref_costs_comp,vpx_prob * comp_mode_p)2198 static void estimate_ref_frame_costs(const VP9_COMMON *cm,
2199                                      const MACROBLOCKD *xd, int segment_id,
2200                                      unsigned int *ref_costs_single,
2201                                      unsigned int *ref_costs_comp,
2202                                      vpx_prob *comp_mode_p) {
2203   int seg_ref_active =
2204       segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
2205   if (seg_ref_active) {
2206     memset(ref_costs_single, 0, MAX_REF_FRAMES * sizeof(*ref_costs_single));
2207     memset(ref_costs_comp, 0, MAX_REF_FRAMES * sizeof(*ref_costs_comp));
2208     *comp_mode_p = 128;
2209   } else {
2210     vpx_prob intra_inter_p = vp9_get_intra_inter_prob(cm, xd);
2211     vpx_prob comp_inter_p = 128;
2212 
2213     if (cm->reference_mode == REFERENCE_MODE_SELECT) {
2214       comp_inter_p = vp9_get_reference_mode_prob(cm, xd);
2215       *comp_mode_p = comp_inter_p;
2216     } else {
2217       *comp_mode_p = 128;
2218     }
2219 
2220     ref_costs_single[INTRA_FRAME] = vp9_cost_bit(intra_inter_p, 0);
2221 
2222     if (cm->reference_mode != COMPOUND_REFERENCE) {
2223       vpx_prob ref_single_p1 = vp9_get_pred_prob_single_ref_p1(cm, xd);
2224       vpx_prob ref_single_p2 = vp9_get_pred_prob_single_ref_p2(cm, xd);
2225       unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1);
2226 
2227       if (cm->reference_mode == REFERENCE_MODE_SELECT)
2228         base_cost += vp9_cost_bit(comp_inter_p, 0);
2229 
2230       ref_costs_single[LAST_FRAME] = ref_costs_single[GOLDEN_FRAME] =
2231           ref_costs_single[ALTREF_FRAME] = base_cost;
2232       ref_costs_single[LAST_FRAME] += vp9_cost_bit(ref_single_p1, 0);
2233       ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p1, 1);
2234       ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p1, 1);
2235       ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p2, 0);
2236       ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p2, 1);
2237     } else {
2238       ref_costs_single[LAST_FRAME] = 512;
2239       ref_costs_single[GOLDEN_FRAME] = 512;
2240       ref_costs_single[ALTREF_FRAME] = 512;
2241     }
2242     if (cm->reference_mode != SINGLE_REFERENCE) {
2243       vpx_prob ref_comp_p = vp9_get_pred_prob_comp_ref_p(cm, xd);
2244       unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1);
2245 
2246       if (cm->reference_mode == REFERENCE_MODE_SELECT)
2247         base_cost += vp9_cost_bit(comp_inter_p, 1);
2248 
2249       ref_costs_comp[LAST_FRAME] = base_cost + vp9_cost_bit(ref_comp_p, 0);
2250       ref_costs_comp[GOLDEN_FRAME] = base_cost + vp9_cost_bit(ref_comp_p, 1);
2251     } else {
2252       ref_costs_comp[LAST_FRAME] = 512;
2253       ref_costs_comp[GOLDEN_FRAME] = 512;
2254     }
2255   }
2256 }
2257 
store_coding_context(MACROBLOCK * x,PICK_MODE_CONTEXT * ctx,int mode_index,int64_t comp_pred_diff[REFERENCE_MODES],int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS],int skippable)2258 static void store_coding_context(
2259     MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int mode_index,
2260     int64_t comp_pred_diff[REFERENCE_MODES],
2261     int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS], int skippable) {
2262   MACROBLOCKD *const xd = &x->e_mbd;
2263 
2264   // Take a snapshot of the coding context so it can be
2265   // restored if we decide to encode this way
2266   ctx->skip = x->skip;
2267   ctx->skippable = skippable;
2268   ctx->best_mode_index = mode_index;
2269   ctx->mic = *xd->mi[0];
2270   ctx->mbmi_ext = *x->mbmi_ext;
2271   ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_REFERENCE];
2272   ctx->comp_pred_diff = (int)comp_pred_diff[COMPOUND_REFERENCE];
2273   ctx->hybrid_pred_diff = (int)comp_pred_diff[REFERENCE_MODE_SELECT];
2274 
2275   memcpy(ctx->best_filter_diff, best_filter_diff,
2276          sizeof(*best_filter_diff) * SWITCHABLE_FILTER_CONTEXTS);
2277 }
2278 
setup_buffer_inter(VP9_COMP * cpi,MACROBLOCK * x,MV_REFERENCE_FRAME ref_frame,BLOCK_SIZE block_size,int mi_row,int mi_col,int_mv frame_nearest_mv[MAX_REF_FRAMES],int_mv frame_near_mv[MAX_REF_FRAMES],struct buf_2d yv12_mb[4][MAX_MB_PLANE])2279 static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
2280                                MV_REFERENCE_FRAME ref_frame,
2281                                BLOCK_SIZE block_size, int mi_row, int mi_col,
2282                                int_mv frame_nearest_mv[MAX_REF_FRAMES],
2283                                int_mv frame_near_mv[MAX_REF_FRAMES],
2284                                struct buf_2d yv12_mb[4][MAX_MB_PLANE]) {
2285   const VP9_COMMON *cm = &cpi->common;
2286   const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
2287   MACROBLOCKD *const xd = &x->e_mbd;
2288   MODE_INFO *const mi = xd->mi[0];
2289   int_mv *const candidates = x->mbmi_ext->ref_mvs[ref_frame];
2290   const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf;
2291   MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
2292 
2293   assert(yv12 != NULL);
2294 
2295   // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this
2296   // use the UV scaling factors.
2297   vp9_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf);
2298 
2299   // Gets an initial list of candidate vectors from neighbours and orders them
2300   vp9_find_mv_refs(cm, xd, mi, ref_frame, candidates, mi_row, mi_col,
2301                    mbmi_ext->mode_context);
2302 
2303   // Candidate refinement carried out at encoder and decoder
2304   vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates,
2305                         &frame_nearest_mv[ref_frame],
2306                         &frame_near_mv[ref_frame]);
2307 
2308   // Further refinement that is encode side only to test the top few candidates
2309   // in full and choose the best as the centre point for subsequent searches.
2310   // The current implementation doesn't support scaling.
2311   if (!vp9_is_scaled(sf) && block_size >= BLOCK_8X8)
2312     vp9_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride, ref_frame,
2313                 block_size);
2314 }
2315 
single_motion_search(VP9_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bsize,int mi_row,int mi_col,int_mv * tmp_mv,int * rate_mv)2316 static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
2317                                  int mi_row, int mi_col, int_mv *tmp_mv,
2318                                  int *rate_mv) {
2319   MACROBLOCKD *xd = &x->e_mbd;
2320   const VP9_COMMON *cm = &cpi->common;
2321   MODE_INFO *mi = xd->mi[0];
2322   struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0 } };
2323   int bestsme = INT_MAX;
2324   int step_param;
2325   int sadpb = x->sadperbit16;
2326   MV mvp_full;
2327   int ref = mi->ref_frame[0];
2328   MV ref_mv = x->mbmi_ext->ref_mvs[ref][0].as_mv;
2329   const MvLimits tmp_mv_limits = x->mv_limits;
2330   int cost_list[5];
2331 
2332   const YV12_BUFFER_CONFIG *scaled_ref_frame =
2333       vp9_get_scaled_ref_frame(cpi, ref);
2334 
2335   MV pred_mv[3];
2336   pred_mv[0] = x->mbmi_ext->ref_mvs[ref][0].as_mv;
2337   pred_mv[1] = x->mbmi_ext->ref_mvs[ref][1].as_mv;
2338   pred_mv[2] = x->pred_mv[ref];
2339 
2340   if (scaled_ref_frame) {
2341     int i;
2342     // Swap out the reference frame for a version that's been scaled to
2343     // match the resolution of the current frame, allowing the existing
2344     // motion search code to be used without additional modifications.
2345     for (i = 0; i < MAX_MB_PLANE; i++) backup_yv12[i] = xd->plane[i].pre[0];
2346 
2347     vp9_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
2348   }
2349 
2350   // Work out the size of the first step in the mv step search.
2351   // 0 here is maximum length first step. 1 is VPXMAX >> 1 etc.
2352   if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) {
2353     // Take wtd average of the step_params based on the last frame's
2354     // max mv magnitude and that based on the best ref mvs of the current
2355     // block for the given reference.
2356     step_param =
2357         (vp9_init_search_range(x->max_mv_context[ref]) + cpi->mv_step_param) /
2358         2;
2359   } else {
2360     step_param = cpi->mv_step_param;
2361   }
2362 
2363   if (cpi->sf.adaptive_motion_search && bsize < BLOCK_64X64) {
2364     int boffset =
2365         2 * (b_width_log2_lookup[BLOCK_64X64] -
2366              VPXMIN(b_height_log2_lookup[bsize], b_width_log2_lookup[bsize]));
2367     step_param = VPXMAX(step_param, boffset);
2368   }
2369 
2370   if (cpi->sf.adaptive_motion_search) {
2371     int bwl = b_width_log2_lookup[bsize];
2372     int bhl = b_height_log2_lookup[bsize];
2373     int tlevel = x->pred_mv_sad[ref] >> (bwl + bhl + 4);
2374 
2375     if (tlevel < 5) step_param += 2;
2376 
2377     // prev_mv_sad is not setup for dynamically scaled frames.
2378     if (cpi->oxcf.resize_mode != RESIZE_DYNAMIC) {
2379       int i;
2380       for (i = LAST_FRAME; i <= ALTREF_FRAME && cm->show_frame; ++i) {
2381         if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) {
2382           x->pred_mv[ref].row = 0;
2383           x->pred_mv[ref].col = 0;
2384           tmp_mv->as_int = INVALID_MV;
2385 
2386           if (scaled_ref_frame) {
2387             int i;
2388             for (i = 0; i < MAX_MB_PLANE; ++i)
2389               xd->plane[i].pre[0] = backup_yv12[i];
2390           }
2391           return;
2392         }
2393       }
2394     }
2395   }
2396 
2397   // Note: MV limits are modified here. Always restore the original values
2398   // after full-pixel motion search.
2399   vp9_set_mv_search_range(&x->mv_limits, &ref_mv);
2400 
2401   mvp_full = pred_mv[x->mv_best_ref_index[ref]];
2402 
2403   mvp_full.col >>= 3;
2404   mvp_full.row >>= 3;
2405 
2406   bestsme = vp9_full_pixel_search(cpi, x, bsize, &mvp_full, step_param, sadpb,
2407                                   cond_cost_list(cpi, cost_list), &ref_mv,
2408                                   &tmp_mv->as_mv, INT_MAX, 1);
2409 
2410   x->mv_limits = tmp_mv_limits;
2411 
2412   if (bestsme < INT_MAX) {
2413     uint32_t dis; /* TODO: use dis in distortion calculation later. */
2414     cpi->find_fractional_mv_step(
2415         x, &tmp_mv->as_mv, &ref_mv, cm->allow_high_precision_mv, x->errorperbit,
2416         &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
2417         cpi->sf.mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list),
2418         x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, 0, 0);
2419   }
2420   *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv, x->nmvjointcost,
2421                              x->mvcost, MV_COST_WEIGHT);
2422 
2423   if (cpi->sf.adaptive_motion_search) x->pred_mv[ref] = tmp_mv->as_mv;
2424 
2425   if (scaled_ref_frame) {
2426     int i;
2427     for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i];
2428   }
2429 }
2430 
restore_dst_buf(MACROBLOCKD * xd,uint8_t * orig_dst[MAX_MB_PLANE],int orig_dst_stride[MAX_MB_PLANE])2431 static INLINE void restore_dst_buf(MACROBLOCKD *xd,
2432                                    uint8_t *orig_dst[MAX_MB_PLANE],
2433                                    int orig_dst_stride[MAX_MB_PLANE]) {
2434   int i;
2435   for (i = 0; i < MAX_MB_PLANE; i++) {
2436     xd->plane[i].dst.buf = orig_dst[i];
2437     xd->plane[i].dst.stride = orig_dst_stride[i];
2438   }
2439 }
2440 
2441 // In some situations we want to discount tha pparent cost of a new motion
2442 // vector. Where there is a subtle motion field and especially where there is
2443 // low spatial complexity then it can be hard to cover the cost of a new motion
2444 // vector in a single block, even if that motion vector reduces distortion.
2445 // However, once established that vector may be usable through the nearest and
2446 // near mv modes to reduce distortion in subsequent blocks and also improve
2447 // visual quality.
discount_newmv_test(const VP9_COMP * cpi,int this_mode,int_mv this_mv,int_mv (* mode_mv)[MAX_REF_FRAMES],int ref_frame)2448 static int discount_newmv_test(const VP9_COMP *cpi, int this_mode,
2449                                int_mv this_mv,
2450                                int_mv (*mode_mv)[MAX_REF_FRAMES],
2451                                int ref_frame) {
2452   return (!cpi->rc.is_src_frame_alt_ref && (this_mode == NEWMV) &&
2453           (this_mv.as_int != 0) &&
2454           ((mode_mv[NEARESTMV][ref_frame].as_int == 0) ||
2455            (mode_mv[NEARESTMV][ref_frame].as_int == INVALID_MV)) &&
2456           ((mode_mv[NEARMV][ref_frame].as_int == 0) ||
2457            (mode_mv[NEARMV][ref_frame].as_int == INVALID_MV)));
2458 }
2459 
handle_inter_mode(VP9_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bsize,int * rate2,int64_t * distortion,int * skippable,int * rate_y,int * rate_uv,int * disable_skip,int_mv (* mode_mv)[MAX_REF_FRAMES],int mi_row,int mi_col,int_mv single_newmv[MAX_REF_FRAMES],INTERP_FILTER (* single_filter)[MAX_REF_FRAMES],int (* single_skippable)[MAX_REF_FRAMES],int64_t * psse,const int64_t ref_best_rd,int64_t * mask_filter,int64_t filter_cache[])2460 static int64_t handle_inter_mode(
2461     VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int *rate2,
2462     int64_t *distortion, int *skippable, int *rate_y, int *rate_uv,
2463     int *disable_skip, int_mv (*mode_mv)[MAX_REF_FRAMES], int mi_row,
2464     int mi_col, int_mv single_newmv[MAX_REF_FRAMES],
2465     INTERP_FILTER (*single_filter)[MAX_REF_FRAMES],
2466     int (*single_skippable)[MAX_REF_FRAMES], int64_t *psse,
2467     const int64_t ref_best_rd, int64_t *mask_filter, int64_t filter_cache[]) {
2468   VP9_COMMON *cm = &cpi->common;
2469   MACROBLOCKD *xd = &x->e_mbd;
2470   MODE_INFO *mi = xd->mi[0];
2471   MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
2472   const int is_comp_pred = has_second_ref(mi);
2473   const int this_mode = mi->mode;
2474   int_mv *frame_mv = mode_mv[this_mode];
2475   int i;
2476   int refs[2] = { mi->ref_frame[0],
2477                   (mi->ref_frame[1] < 0 ? 0 : mi->ref_frame[1]) };
2478   int_mv cur_mv[2];
2479 #if CONFIG_VP9_HIGHBITDEPTH
2480   DECLARE_ALIGNED(16, uint16_t, tmp_buf16[MAX_MB_PLANE * 64 * 64]);
2481   uint8_t *tmp_buf;
2482 #else
2483   DECLARE_ALIGNED(16, uint8_t, tmp_buf[MAX_MB_PLANE * 64 * 64]);
2484 #endif  // CONFIG_VP9_HIGHBITDEPTH
2485   int pred_exists = 0;
2486   int intpel_mv;
2487   int64_t rd, tmp_rd, best_rd = INT64_MAX;
2488   int best_needs_copy = 0;
2489   uint8_t *orig_dst[MAX_MB_PLANE];
2490   int orig_dst_stride[MAX_MB_PLANE];
2491   int rs = 0;
2492   INTERP_FILTER best_filter = SWITCHABLE;
2493   uint8_t skip_txfm[MAX_MB_PLANE << 2] = { 0 };
2494   int64_t bsse[MAX_MB_PLANE << 2] = { 0 };
2495 
2496   int bsl = mi_width_log2_lookup[bsize];
2497   int pred_filter_search =
2498       cpi->sf.cb_pred_filter_search
2499           ? (((mi_row + mi_col) >> bsl) +
2500              get_chessboard_index(cm->current_video_frame)) &
2501                 0x1
2502           : 0;
2503 
2504   int skip_txfm_sb = 0;
2505   int64_t skip_sse_sb = INT64_MAX;
2506   int64_t distortion_y = 0, distortion_uv = 0;
2507 
2508 #if CONFIG_VP9_HIGHBITDEPTH
2509   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
2510     tmp_buf = CONVERT_TO_BYTEPTR(tmp_buf16);
2511   } else {
2512     tmp_buf = (uint8_t *)tmp_buf16;
2513   }
2514 #endif  // CONFIG_VP9_HIGHBITDEPTH
2515 
2516   if (pred_filter_search) {
2517     INTERP_FILTER af = SWITCHABLE, lf = SWITCHABLE;
2518     if (xd->above_mi && is_inter_block(xd->above_mi))
2519       af = xd->above_mi->interp_filter;
2520     if (xd->left_mi && is_inter_block(xd->left_mi))
2521       lf = xd->left_mi->interp_filter;
2522 
2523     if ((this_mode != NEWMV) || (af == lf)) best_filter = af;
2524   }
2525 
2526   if (is_comp_pred) {
2527     if (frame_mv[refs[0]].as_int == INVALID_MV ||
2528         frame_mv[refs[1]].as_int == INVALID_MV)
2529       return INT64_MAX;
2530 
2531     if (cpi->sf.adaptive_mode_search) {
2532       if (single_filter[this_mode][refs[0]] ==
2533           single_filter[this_mode][refs[1]])
2534         best_filter = single_filter[this_mode][refs[0]];
2535     }
2536   }
2537 
2538   if (this_mode == NEWMV) {
2539     int rate_mv;
2540     if (is_comp_pred) {
2541       // Initialize mv using single prediction mode result.
2542       frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int;
2543       frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int;
2544 
2545       if (cpi->sf.comp_inter_joint_search_thresh <= bsize) {
2546         joint_motion_search(cpi, x, bsize, frame_mv, mi_row, mi_col,
2547                             single_newmv, &rate_mv);
2548       } else {
2549         rate_mv = vp9_mv_bit_cost(&frame_mv[refs[0]].as_mv,
2550                                   &x->mbmi_ext->ref_mvs[refs[0]][0].as_mv,
2551                                   x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2552         rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]].as_mv,
2553                                    &x->mbmi_ext->ref_mvs[refs[1]][0].as_mv,
2554                                    x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
2555       }
2556       *rate2 += rate_mv;
2557     } else {
2558       int_mv tmp_mv;
2559       single_motion_search(cpi, x, bsize, mi_row, mi_col, &tmp_mv, &rate_mv);
2560       if (tmp_mv.as_int == INVALID_MV) return INT64_MAX;
2561 
2562       frame_mv[refs[0]].as_int = xd->mi[0]->bmi[0].as_mv[0].as_int =
2563           tmp_mv.as_int;
2564       single_newmv[refs[0]].as_int = tmp_mv.as_int;
2565 
2566       // Estimate the rate implications of a new mv but discount this
2567       // under certain circumstances where we want to help initiate a weak
2568       // motion field, where the distortion gain for a single block may not
2569       // be enough to overcome the cost of a new mv.
2570       if (discount_newmv_test(cpi, this_mode, tmp_mv, mode_mv, refs[0])) {
2571         *rate2 += VPXMAX((rate_mv / NEW_MV_DISCOUNT_FACTOR), 1);
2572       } else {
2573         *rate2 += rate_mv;
2574       }
2575     }
2576   }
2577 
2578   for (i = 0; i < is_comp_pred + 1; ++i) {
2579     cur_mv[i] = frame_mv[refs[i]];
2580     // Clip "next_nearest" so that it does not extend to far out of image
2581     if (this_mode != NEWMV) clamp_mv2(&cur_mv[i].as_mv, xd);
2582 
2583     if (mv_check_bounds(&x->mv_limits, &cur_mv[i].as_mv)) return INT64_MAX;
2584     mi->mv[i].as_int = cur_mv[i].as_int;
2585   }
2586 
2587   // do first prediction into the destination buffer. Do the next
2588   // prediction into a temporary buffer. Then keep track of which one
2589   // of these currently holds the best predictor, and use the other
2590   // one for future predictions. In the end, copy from tmp_buf to
2591   // dst if necessary.
2592   for (i = 0; i < MAX_MB_PLANE; i++) {
2593     orig_dst[i] = xd->plane[i].dst.buf;
2594     orig_dst_stride[i] = xd->plane[i].dst.stride;
2595   }
2596 
2597   // We don't include the cost of the second reference here, because there
2598   // are only two options: Last/ARF or Golden/ARF; The second one is always
2599   // known, which is ARF.
2600   //
2601   // Under some circumstances we discount the cost of new mv mode to encourage
2602   // initiation of a motion field.
2603   if (discount_newmv_test(cpi, this_mode, frame_mv[refs[0]], mode_mv,
2604                           refs[0])) {
2605     *rate2 +=
2606         VPXMIN(cost_mv_ref(cpi, this_mode, mbmi_ext->mode_context[refs[0]]),
2607                cost_mv_ref(cpi, NEARESTMV, mbmi_ext->mode_context[refs[0]]));
2608   } else {
2609     *rate2 += cost_mv_ref(cpi, this_mode, mbmi_ext->mode_context[refs[0]]);
2610   }
2611 
2612   if (RDCOST(x->rdmult, x->rddiv, *rate2, 0) > ref_best_rd &&
2613       mi->mode != NEARESTMV)
2614     return INT64_MAX;
2615 
2616   pred_exists = 0;
2617   // Are all MVs integer pel for Y and UV
2618   intpel_mv = !mv_has_subpel(&mi->mv[0].as_mv);
2619   if (is_comp_pred) intpel_mv &= !mv_has_subpel(&mi->mv[1].as_mv);
2620 
2621   // Search for best switchable filter by checking the variance of
2622   // pred error irrespective of whether the filter will be used
2623   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) filter_cache[i] = INT64_MAX;
2624 
2625   if (cm->interp_filter != BILINEAR) {
2626     if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) {
2627       best_filter = EIGHTTAP;
2628     } else if (best_filter == SWITCHABLE) {
2629       int newbest;
2630       int tmp_rate_sum = 0;
2631       int64_t tmp_dist_sum = 0;
2632 
2633       for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
2634         int j;
2635         int64_t rs_rd;
2636         int tmp_skip_sb = 0;
2637         int64_t tmp_skip_sse = INT64_MAX;
2638 
2639         mi->interp_filter = i;
2640         rs = vp9_get_switchable_rate(cpi, xd);
2641         rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
2642 
2643         if (i > 0 && intpel_mv) {
2644           rd = RDCOST(x->rdmult, x->rddiv, tmp_rate_sum, tmp_dist_sum);
2645           filter_cache[i] = rd;
2646           filter_cache[SWITCHABLE_FILTERS] =
2647               VPXMIN(filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
2648           if (cm->interp_filter == SWITCHABLE) rd += rs_rd;
2649           *mask_filter = VPXMAX(*mask_filter, rd);
2650         } else {
2651           int rate_sum = 0;
2652           int64_t dist_sum = 0;
2653           if (i > 0 && cpi->sf.adaptive_interp_filter_search &&
2654               (cpi->sf.interp_filter_search_mask & (1 << i))) {
2655             rate_sum = INT_MAX;
2656             dist_sum = INT64_MAX;
2657             continue;
2658           }
2659 
2660           if ((cm->interp_filter == SWITCHABLE && (!i || best_needs_copy)) ||
2661               (cm->interp_filter != SWITCHABLE &&
2662                (cm->interp_filter == mi->interp_filter ||
2663                 (i == 0 && intpel_mv)))) {
2664             restore_dst_buf(xd, orig_dst, orig_dst_stride);
2665           } else {
2666             for (j = 0; j < MAX_MB_PLANE; j++) {
2667               xd->plane[j].dst.buf = tmp_buf + j * 64 * 64;
2668               xd->plane[j].dst.stride = 64;
2669             }
2670           }
2671           vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
2672           model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum, &tmp_skip_sb,
2673                           &tmp_skip_sse);
2674 
2675           rd = RDCOST(x->rdmult, x->rddiv, rate_sum, dist_sum);
2676           filter_cache[i] = rd;
2677           filter_cache[SWITCHABLE_FILTERS] =
2678               VPXMIN(filter_cache[SWITCHABLE_FILTERS], rd + rs_rd);
2679           if (cm->interp_filter == SWITCHABLE) rd += rs_rd;
2680           *mask_filter = VPXMAX(*mask_filter, rd);
2681 
2682           if (i == 0 && intpel_mv) {
2683             tmp_rate_sum = rate_sum;
2684             tmp_dist_sum = dist_sum;
2685           }
2686         }
2687 
2688         if (i == 0 && cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
2689           if (rd / 2 > ref_best_rd) {
2690             restore_dst_buf(xd, orig_dst, orig_dst_stride);
2691             return INT64_MAX;
2692           }
2693         }
2694         newbest = i == 0 || rd < best_rd;
2695 
2696         if (newbest) {
2697           best_rd = rd;
2698           best_filter = mi->interp_filter;
2699           if (cm->interp_filter == SWITCHABLE && i && !intpel_mv)
2700             best_needs_copy = !best_needs_copy;
2701         }
2702 
2703         if ((cm->interp_filter == SWITCHABLE && newbest) ||
2704             (cm->interp_filter != SWITCHABLE &&
2705              cm->interp_filter == mi->interp_filter)) {
2706           pred_exists = 1;
2707           tmp_rd = best_rd;
2708 
2709           skip_txfm_sb = tmp_skip_sb;
2710           skip_sse_sb = tmp_skip_sse;
2711           memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm));
2712           memcpy(bsse, x->bsse, sizeof(bsse));
2713         }
2714       }
2715       restore_dst_buf(xd, orig_dst, orig_dst_stride);
2716     }
2717   }
2718   // Set the appropriate filter
2719   mi->interp_filter =
2720       cm->interp_filter != SWITCHABLE ? cm->interp_filter : best_filter;
2721   rs = cm->interp_filter == SWITCHABLE ? vp9_get_switchable_rate(cpi, xd) : 0;
2722 
2723   if (pred_exists) {
2724     if (best_needs_copy) {
2725       // again temporarily set the buffers to local memory to prevent a memcpy
2726       for (i = 0; i < MAX_MB_PLANE; i++) {
2727         xd->plane[i].dst.buf = tmp_buf + i * 64 * 64;
2728         xd->plane[i].dst.stride = 64;
2729       }
2730     }
2731     rd = tmp_rd + RDCOST(x->rdmult, x->rddiv, rs, 0);
2732   } else {
2733     int tmp_rate;
2734     int64_t tmp_dist;
2735     // Handles the special case when a filter that is not in the
2736     // switchable list (ex. bilinear) is indicated at the frame level, or
2737     // skip condition holds.
2738     vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize);
2739     model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate, &tmp_dist, &skip_txfm_sb,
2740                     &skip_sse_sb);
2741     rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist);
2742     memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm));
2743     memcpy(bsse, x->bsse, sizeof(bsse));
2744   }
2745 
2746   if (!is_comp_pred) single_filter[this_mode][refs[0]] = mi->interp_filter;
2747 
2748   if (cpi->sf.adaptive_mode_search)
2749     if (is_comp_pred)
2750       if (single_skippable[this_mode][refs[0]] &&
2751           single_skippable[this_mode][refs[1]])
2752         memset(skip_txfm, SKIP_TXFM_AC_DC, sizeof(skip_txfm));
2753 
2754   if (cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) {
2755     // if current pred_error modeled rd is substantially more than the best
2756     // so far, do not bother doing full rd
2757     if (rd / 2 > ref_best_rd) {
2758       restore_dst_buf(xd, orig_dst, orig_dst_stride);
2759       return INT64_MAX;
2760     }
2761   }
2762 
2763   if (cm->interp_filter == SWITCHABLE) *rate2 += rs;
2764 
2765   memcpy(x->skip_txfm, skip_txfm, sizeof(skip_txfm));
2766   memcpy(x->bsse, bsse, sizeof(bsse));
2767 
2768   if (!skip_txfm_sb) {
2769     int skippable_y, skippable_uv;
2770     int64_t sseuv = INT64_MAX;
2771     int64_t rdcosty = INT64_MAX;
2772 
2773     // Y cost and distortion
2774     vp9_subtract_plane(x, bsize, 0);
2775     super_block_yrd(cpi, x, rate_y, &distortion_y, &skippable_y, psse, bsize,
2776                     ref_best_rd);
2777 
2778     if (*rate_y == INT_MAX) {
2779       *rate2 = INT_MAX;
2780       *distortion = INT64_MAX;
2781       restore_dst_buf(xd, orig_dst, orig_dst_stride);
2782       return INT64_MAX;
2783     }
2784 
2785     *rate2 += *rate_y;
2786     *distortion += distortion_y;
2787 
2788     rdcosty = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion);
2789     rdcosty = VPXMIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, *psse));
2790 
2791     if (!super_block_uvrd(cpi, x, rate_uv, &distortion_uv, &skippable_uv,
2792                           &sseuv, bsize, ref_best_rd - rdcosty)) {
2793       *rate2 = INT_MAX;
2794       *distortion = INT64_MAX;
2795       restore_dst_buf(xd, orig_dst, orig_dst_stride);
2796       return INT64_MAX;
2797     }
2798 
2799     *psse += sseuv;
2800     *rate2 += *rate_uv;
2801     *distortion += distortion_uv;
2802     *skippable = skippable_y && skippable_uv;
2803   } else {
2804     x->skip = 1;
2805     *disable_skip = 1;
2806 
2807     // The cost of skip bit needs to be added.
2808     *rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
2809 
2810     *distortion = skip_sse_sb;
2811   }
2812 
2813   if (!is_comp_pred) single_skippable[this_mode][refs[0]] = *skippable;
2814 
2815   restore_dst_buf(xd, orig_dst, orig_dst_stride);
2816   return 0;  // The rate-distortion cost will be re-calculated by caller.
2817 }
2818 
vp9_rd_pick_intra_mode_sb(VP9_COMP * cpi,MACROBLOCK * x,RD_COST * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,int64_t best_rd)2819 void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *rd_cost,
2820                                BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
2821                                int64_t best_rd) {
2822   VP9_COMMON *const cm = &cpi->common;
2823   MACROBLOCKD *const xd = &x->e_mbd;
2824   struct macroblockd_plane *const pd = xd->plane;
2825   int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
2826   int y_skip = 0, uv_skip = 0;
2827   int64_t dist_y = 0, dist_uv = 0;
2828   TX_SIZE max_uv_tx_size;
2829   x->skip_encode = 0;
2830   ctx->skip = 0;
2831   xd->mi[0]->ref_frame[0] = INTRA_FRAME;
2832   xd->mi[0]->ref_frame[1] = NONE;
2833   // Initialize interp_filter here so we do not have to check for inter block
2834   // modes in get_pred_context_switchable_interp()
2835   xd->mi[0]->interp_filter = SWITCHABLE_FILTERS;
2836 
2837   if (bsize >= BLOCK_8X8) {
2838     if (rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, &dist_y,
2839                                &y_skip, bsize, best_rd) >= best_rd) {
2840       rd_cost->rate = INT_MAX;
2841       return;
2842     }
2843   } else {
2844     y_skip = 0;
2845     if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate_y, &rate_y_tokenonly,
2846                                      &dist_y, best_rd) >= best_rd) {
2847       rd_cost->rate = INT_MAX;
2848       return;
2849     }
2850   }
2851   max_uv_tx_size = uv_txsize_lookup[bsize][xd->mi[0]->tx_size]
2852                                    [pd[1].subsampling_x][pd[1].subsampling_y];
2853   rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly, &dist_uv,
2854                           &uv_skip, VPXMAX(BLOCK_8X8, bsize), max_uv_tx_size);
2855 
2856   if (y_skip && uv_skip) {
2857     rd_cost->rate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly +
2858                     vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1);
2859     rd_cost->dist = dist_y + dist_uv;
2860   } else {
2861     rd_cost->rate =
2862         rate_y + rate_uv + vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0);
2863     rd_cost->dist = dist_y + dist_uv;
2864   }
2865 
2866   ctx->mic = *xd->mi[0];
2867   ctx->mbmi_ext = *x->mbmi_ext;
2868   rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist);
2869 }
2870 
2871 // This function is designed to apply a bias or adjustment to an rd value based
2872 // on the relative variance of the source and reconstruction.
2873 #define LOW_VAR_THRESH 16
2874 #define VLOW_ADJ_MAX 25
2875 #define VHIGH_ADJ_MAX 8
rd_variance_adjustment(VP9_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bsize,int64_t * this_rd,MV_REFERENCE_FRAME ref_frame,unsigned int source_variance)2876 static void rd_variance_adjustment(VP9_COMP *cpi, MACROBLOCK *x,
2877                                    BLOCK_SIZE bsize, int64_t *this_rd,
2878                                    MV_REFERENCE_FRAME ref_frame,
2879                                    unsigned int source_variance) {
2880   MACROBLOCKD *const xd = &x->e_mbd;
2881   unsigned int recon_variance;
2882   unsigned int absvar_diff = 0;
2883   int64_t var_error = 0;
2884   int64_t var_factor = 0;
2885 
2886   if (*this_rd == INT64_MAX) return;
2887 
2888 #if CONFIG_VP9_HIGHBITDEPTH
2889   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
2890     recon_variance = vp9_high_get_sby_perpixel_variance(cpi, &xd->plane[0].dst,
2891                                                         bsize, xd->bd);
2892   } else {
2893     recon_variance =
2894         vp9_get_sby_perpixel_variance(cpi, &xd->plane[0].dst, bsize);
2895   }
2896 #else
2897   recon_variance = vp9_get_sby_perpixel_variance(cpi, &xd->plane[0].dst, bsize);
2898 #endif  // CONFIG_VP9_HIGHBITDEPTH
2899 
2900   if ((source_variance + recon_variance) > LOW_VAR_THRESH) {
2901     absvar_diff = (source_variance > recon_variance)
2902                       ? (source_variance - recon_variance)
2903                       : (recon_variance - source_variance);
2904 
2905     var_error = ((int64_t)200 * source_variance * recon_variance) /
2906                 (((int64_t)source_variance * source_variance) +
2907                  ((int64_t)recon_variance * recon_variance));
2908     var_error = 100 - var_error;
2909   }
2910 
2911   // Source variance above a threshold and ref frame is intra.
2912   // This case is targeted mainly at discouraging intra modes that give rise
2913   // to a predictor with a low spatial complexity compared to the source.
2914   if ((source_variance > LOW_VAR_THRESH) && (ref_frame == INTRA_FRAME) &&
2915       (source_variance > recon_variance)) {
2916     var_factor = VPXMIN(absvar_diff, VPXMIN(VLOW_ADJ_MAX, var_error));
2917     // A second possible case of interest is where the source variance
2918     // is very low and we wish to discourage false texture or motion trails.
2919   } else if ((source_variance < (LOW_VAR_THRESH >> 1)) &&
2920              (recon_variance > source_variance)) {
2921     var_factor = VPXMIN(absvar_diff, VPXMIN(VHIGH_ADJ_MAX, var_error));
2922   }
2923   *this_rd += (*this_rd * var_factor) / 100;
2924 }
2925 
2926 // Do we have an internal image edge (e.g. formatting bars).
vp9_internal_image_edge(VP9_COMP * cpi)2927 int vp9_internal_image_edge(VP9_COMP *cpi) {
2928   return (cpi->oxcf.pass == 2) &&
2929          ((cpi->twopass.this_frame_stats.inactive_zone_rows > 0) ||
2930           (cpi->twopass.this_frame_stats.inactive_zone_cols > 0));
2931 }
2932 
2933 // Checks to see if a super block is on a horizontal image edge.
2934 // In most cases this is the "real" edge unless there are formatting
2935 // bars embedded in the stream.
vp9_active_h_edge(VP9_COMP * cpi,int mi_row,int mi_step)2936 int vp9_active_h_edge(VP9_COMP *cpi, int mi_row, int mi_step) {
2937   int top_edge = 0;
2938   int bottom_edge = cpi->common.mi_rows;
2939   int is_active_h_edge = 0;
2940 
2941   // For two pass account for any formatting bars detected.
2942   if (cpi->oxcf.pass == 2) {
2943     TWO_PASS *twopass = &cpi->twopass;
2944 
2945     // The inactive region is specified in MBs not mi units.
2946     // The image edge is in the following MB row.
2947     top_edge += (int)(twopass->this_frame_stats.inactive_zone_rows * 2);
2948 
2949     bottom_edge -= (int)(twopass->this_frame_stats.inactive_zone_rows * 2);
2950     bottom_edge = VPXMAX(top_edge, bottom_edge);
2951   }
2952 
2953   if (((top_edge >= mi_row) && (top_edge < (mi_row + mi_step))) ||
2954       ((bottom_edge >= mi_row) && (bottom_edge < (mi_row + mi_step)))) {
2955     is_active_h_edge = 1;
2956   }
2957   return is_active_h_edge;
2958 }
2959 
2960 // Checks to see if a super block is on a vertical image edge.
2961 // In most cases this is the "real" edge unless there are formatting
2962 // bars embedded in the stream.
vp9_active_v_edge(VP9_COMP * cpi,int mi_col,int mi_step)2963 int vp9_active_v_edge(VP9_COMP *cpi, int mi_col, int mi_step) {
2964   int left_edge = 0;
2965   int right_edge = cpi->common.mi_cols;
2966   int is_active_v_edge = 0;
2967 
2968   // For two pass account for any formatting bars detected.
2969   if (cpi->oxcf.pass == 2) {
2970     TWO_PASS *twopass = &cpi->twopass;
2971 
2972     // The inactive region is specified in MBs not mi units.
2973     // The image edge is in the following MB row.
2974     left_edge += (int)(twopass->this_frame_stats.inactive_zone_cols * 2);
2975 
2976     right_edge -= (int)(twopass->this_frame_stats.inactive_zone_cols * 2);
2977     right_edge = VPXMAX(left_edge, right_edge);
2978   }
2979 
2980   if (((left_edge >= mi_col) && (left_edge < (mi_col + mi_step))) ||
2981       ((right_edge >= mi_col) && (right_edge < (mi_col + mi_step)))) {
2982     is_active_v_edge = 1;
2983   }
2984   return is_active_v_edge;
2985 }
2986 
2987 // Checks to see if a super block is at the edge of the active image.
2988 // In most cases this is the "real" edge unless there are formatting
2989 // bars embedded in the stream.
vp9_active_edge_sb(VP9_COMP * cpi,int mi_row,int mi_col)2990 int vp9_active_edge_sb(VP9_COMP *cpi, int mi_row, int mi_col) {
2991   return vp9_active_h_edge(cpi, mi_row, MI_BLOCK_SIZE) ||
2992          vp9_active_v_edge(cpi, mi_col, MI_BLOCK_SIZE);
2993 }
2994 
vp9_rd_pick_inter_mode_sb(VP9_COMP * cpi,TileDataEnc * tile_data,MACROBLOCK * x,int mi_row,int mi_col,RD_COST * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,int64_t best_rd_so_far)2995 void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data,
2996                                MACROBLOCK *x, int mi_row, int mi_col,
2997                                RD_COST *rd_cost, BLOCK_SIZE bsize,
2998                                PICK_MODE_CONTEXT *ctx, int64_t best_rd_so_far) {
2999   VP9_COMMON *const cm = &cpi->common;
3000   TileInfo *const tile_info = &tile_data->tile_info;
3001   RD_OPT *const rd_opt = &cpi->rd;
3002   SPEED_FEATURES *const sf = &cpi->sf;
3003   MACROBLOCKD *const xd = &x->e_mbd;
3004   MODE_INFO *const mi = xd->mi[0];
3005   MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
3006   const struct segmentation *const seg = &cm->seg;
3007   PREDICTION_MODE this_mode;
3008   MV_REFERENCE_FRAME ref_frame, second_ref_frame;
3009   unsigned char segment_id = mi->segment_id;
3010   int comp_pred, i, k;
3011   int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
3012   struct buf_2d yv12_mb[4][MAX_MB_PLANE];
3013   int_mv single_newmv[MAX_REF_FRAMES] = { { 0 } };
3014   INTERP_FILTER single_inter_filter[MB_MODE_COUNT][MAX_REF_FRAMES];
3015   int single_skippable[MB_MODE_COUNT][MAX_REF_FRAMES];
3016   static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
3017                                     VP9_ALT_FLAG };
3018   int64_t best_rd = best_rd_so_far;
3019   int64_t best_pred_diff[REFERENCE_MODES];
3020   int64_t best_pred_rd[REFERENCE_MODES];
3021   int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
3022   int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
3023   MODE_INFO best_mbmode;
3024   int best_mode_skippable = 0;
3025   int midx, best_mode_index = -1;
3026   unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
3027   vpx_prob comp_mode_p;
3028   int64_t best_intra_rd = INT64_MAX;
3029   unsigned int best_pred_sse = UINT_MAX;
3030   PREDICTION_MODE best_intra_mode = DC_PRED;
3031   int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES];
3032   int64_t dist_uv[TX_SIZES];
3033   int skip_uv[TX_SIZES];
3034   PREDICTION_MODE mode_uv[TX_SIZES];
3035   const int intra_cost_penalty = vp9_get_intra_cost_penalty(
3036       cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
3037   int best_skip2 = 0;
3038   uint8_t ref_frame_skip_mask[2] = { 0 };
3039   uint16_t mode_skip_mask[MAX_REF_FRAMES] = { 0 };
3040   int mode_skip_start = sf->mode_skip_start + 1;
3041   const int *const rd_threshes = rd_opt->threshes[segment_id][bsize];
3042   const int *const rd_thresh_freq_fact = tile_data->thresh_freq_fact[bsize];
3043   int64_t mode_threshold[MAX_MODES];
3044   int *mode_map = tile_data->mode_map[bsize];
3045   const int mode_search_skip_flags = sf->mode_search_skip_flags;
3046   int64_t mask_filter = 0;
3047   int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS];
3048 
3049   vp9_zero(best_mbmode);
3050 
3051   x->skip_encode = sf->skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
3052 
3053   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) filter_cache[i] = INT64_MAX;
3054 
3055   estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
3056                            &comp_mode_p);
3057 
3058   for (i = 0; i < REFERENCE_MODES; ++i) best_pred_rd[i] = INT64_MAX;
3059   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
3060     best_filter_rd[i] = INT64_MAX;
3061   for (i = 0; i < TX_SIZES; i++) rate_uv_intra[i] = INT_MAX;
3062   for (i = 0; i < MAX_REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX;
3063   for (i = 0; i < MB_MODE_COUNT; ++i) {
3064     for (k = 0; k < MAX_REF_FRAMES; ++k) {
3065       single_inter_filter[i][k] = SWITCHABLE;
3066       single_skippable[i][k] = 0;
3067     }
3068   }
3069 
3070   rd_cost->rate = INT_MAX;
3071 
3072   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3073     x->pred_mv_sad[ref_frame] = INT_MAX;
3074     if (cpi->ref_frame_flags & flag_list[ref_frame]) {
3075       assert(get_ref_frame_buffer(cpi, ref_frame) != NULL);
3076       setup_buffer_inter(cpi, x, ref_frame, bsize, mi_row, mi_col,
3077                          frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb);
3078     }
3079     frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
3080     frame_mv[ZEROMV][ref_frame].as_int = 0;
3081   }
3082 
3083   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3084     if (!(cpi->ref_frame_flags & flag_list[ref_frame])) {
3085       // Skip checking missing references in both single and compound reference
3086       // modes. Note that a mode will be skipped if both reference frames
3087       // are masked out.
3088       ref_frame_skip_mask[0] |= (1 << ref_frame);
3089       ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
3090     } else if (sf->reference_masking) {
3091       for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
3092         // Skip fixed mv modes for poor references
3093         if ((x->pred_mv_sad[ref_frame] >> 2) > x->pred_mv_sad[i]) {
3094           mode_skip_mask[ref_frame] |= INTER_NEAREST_NEAR_ZERO;
3095           break;
3096         }
3097       }
3098     }
3099     // If the segment reference frame feature is enabled....
3100     // then do nothing if the current ref frame is not allowed..
3101     if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
3102         get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
3103       ref_frame_skip_mask[0] |= (1 << ref_frame);
3104       ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
3105     }
3106   }
3107 
3108   // Disable this drop out case if the ref frame
3109   // segment level feature is enabled for this segment. This is to
3110   // prevent the possibility that we end up unable to pick any mode.
3111   if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
3112     // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
3113     // unless ARNR filtering is enabled in which case we want
3114     // an unfiltered alternative. We allow near/nearest as well
3115     // because they may result in zero-zero MVs but be cheaper.
3116     if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) {
3117       ref_frame_skip_mask[0] = (1 << LAST_FRAME) | (1 << GOLDEN_FRAME);
3118       ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK;
3119       mode_skip_mask[ALTREF_FRAME] = ~INTER_NEAREST_NEAR_ZERO;
3120       if (frame_mv[NEARMV][ALTREF_FRAME].as_int != 0)
3121         mode_skip_mask[ALTREF_FRAME] |= (1 << NEARMV);
3122       if (frame_mv[NEARESTMV][ALTREF_FRAME].as_int != 0)
3123         mode_skip_mask[ALTREF_FRAME] |= (1 << NEARESTMV);
3124     }
3125   }
3126 
3127   if (cpi->rc.is_src_frame_alt_ref) {
3128     if (sf->alt_ref_search_fp) {
3129       mode_skip_mask[ALTREF_FRAME] = 0;
3130       ref_frame_skip_mask[0] = ~(1 << ALTREF_FRAME);
3131       ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK;
3132     }
3133   }
3134 
3135   if (sf->alt_ref_search_fp)
3136     if (!cm->show_frame && x->pred_mv_sad[GOLDEN_FRAME] < INT_MAX)
3137       if (x->pred_mv_sad[ALTREF_FRAME] > (x->pred_mv_sad[GOLDEN_FRAME] << 1))
3138         mode_skip_mask[ALTREF_FRAME] |= INTER_ALL;
3139 
3140   if (sf->adaptive_mode_search) {
3141     if (cm->show_frame && !cpi->rc.is_src_frame_alt_ref &&
3142         cpi->rc.frames_since_golden >= 3)
3143       if (x->pred_mv_sad[GOLDEN_FRAME] > (x->pred_mv_sad[LAST_FRAME] << 1))
3144         mode_skip_mask[GOLDEN_FRAME] |= INTER_ALL;
3145   }
3146 
3147   if (bsize > sf->max_intra_bsize) {
3148     ref_frame_skip_mask[0] |= (1 << INTRA_FRAME);
3149     ref_frame_skip_mask[1] |= (1 << INTRA_FRAME);
3150   }
3151 
3152   mode_skip_mask[INTRA_FRAME] |=
3153       ~(sf->intra_y_mode_mask[max_txsize_lookup[bsize]]);
3154 
3155   for (i = 0; i <= LAST_NEW_MV_INDEX; ++i) mode_threshold[i] = 0;
3156   for (i = LAST_NEW_MV_INDEX + 1; i < MAX_MODES; ++i)
3157     mode_threshold[i] = ((int64_t)rd_threshes[i] * rd_thresh_freq_fact[i]) >> 5;
3158 
3159   midx = sf->schedule_mode_search ? mode_skip_start : 0;
3160   while (midx > 4) {
3161     uint8_t end_pos = 0;
3162     for (i = 5; i < midx; ++i) {
3163       if (mode_threshold[mode_map[i - 1]] > mode_threshold[mode_map[i]]) {
3164         uint8_t tmp = mode_map[i];
3165         mode_map[i] = mode_map[i - 1];
3166         mode_map[i - 1] = tmp;
3167         end_pos = i;
3168       }
3169     }
3170     midx = end_pos;
3171   }
3172 
3173   for (midx = 0; midx < MAX_MODES; ++midx) {
3174     int mode_index = mode_map[midx];
3175     int mode_excluded = 0;
3176     int64_t this_rd = INT64_MAX;
3177     int disable_skip = 0;
3178     int compmode_cost = 0;
3179     int rate2 = 0, rate_y = 0, rate_uv = 0;
3180     int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
3181     int skippable = 0;
3182     int this_skip2 = 0;
3183     int64_t total_sse = INT64_MAX;
3184     int early_term = 0;
3185 
3186     this_mode = vp9_mode_order[mode_index].mode;
3187     ref_frame = vp9_mode_order[mode_index].ref_frame[0];
3188     second_ref_frame = vp9_mode_order[mode_index].ref_frame[1];
3189 
3190     // Look at the reference frame of the best mode so far and set the
3191     // skip mask to look at a subset of the remaining modes.
3192     if (midx == mode_skip_start && best_mode_index >= 0) {
3193       switch (best_mbmode.ref_frame[0]) {
3194         case INTRA_FRAME: break;
3195         case LAST_FRAME:
3196           ref_frame_skip_mask[0] |= LAST_FRAME_MODE_MASK;
3197           ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
3198           break;
3199         case GOLDEN_FRAME:
3200           ref_frame_skip_mask[0] |= GOLDEN_FRAME_MODE_MASK;
3201           ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
3202           break;
3203         case ALTREF_FRAME: ref_frame_skip_mask[0] |= ALT_REF_MODE_MASK; break;
3204         case NONE:
3205         case MAX_REF_FRAMES: assert(0 && "Invalid Reference frame"); break;
3206       }
3207     }
3208 
3209     if ((ref_frame_skip_mask[0] & (1 << ref_frame)) &&
3210         (ref_frame_skip_mask[1] & (1 << VPXMAX(0, second_ref_frame))))
3211       continue;
3212 
3213     if (mode_skip_mask[ref_frame] & (1 << this_mode)) continue;
3214 
3215     // Test best rd so far against threshold for trying this mode.
3216     if (best_mode_skippable && sf->schedule_mode_search)
3217       mode_threshold[mode_index] <<= 1;
3218 
3219     if (best_rd < mode_threshold[mode_index]) continue;
3220 
3221     if (sf->motion_field_mode_search) {
3222       const int mi_width = VPXMIN(num_8x8_blocks_wide_lookup[bsize],
3223                                   tile_info->mi_col_end - mi_col);
3224       const int mi_height = VPXMIN(num_8x8_blocks_high_lookup[bsize],
3225                                    tile_info->mi_row_end - mi_row);
3226       const int bsl = mi_width_log2_lookup[bsize];
3227       int cb_partition_search_ctrl =
3228           (((mi_row + mi_col) >> bsl) +
3229            get_chessboard_index(cm->current_video_frame)) &
3230           0x1;
3231       MODE_INFO *ref_mi;
3232       int const_motion = 1;
3233       int skip_ref_frame = !cb_partition_search_ctrl;
3234       MV_REFERENCE_FRAME rf = NONE;
3235       int_mv ref_mv;
3236       ref_mv.as_int = INVALID_MV;
3237 
3238       if ((mi_row - 1) >= tile_info->mi_row_start) {
3239         ref_mv = xd->mi[-xd->mi_stride]->mv[0];
3240         rf = xd->mi[-xd->mi_stride]->ref_frame[0];
3241         for (i = 0; i < mi_width; ++i) {
3242           ref_mi = xd->mi[-xd->mi_stride + i];
3243           const_motion &= (ref_mv.as_int == ref_mi->mv[0].as_int) &&
3244                           (ref_frame == ref_mi->ref_frame[0]);
3245           skip_ref_frame &= (rf == ref_mi->ref_frame[0]);
3246         }
3247       }
3248 
3249       if ((mi_col - 1) >= tile_info->mi_col_start) {
3250         if (ref_mv.as_int == INVALID_MV) ref_mv = xd->mi[-1]->mv[0];
3251         if (rf == NONE) rf = xd->mi[-1]->ref_frame[0];
3252         for (i = 0; i < mi_height; ++i) {
3253           ref_mi = xd->mi[i * xd->mi_stride - 1];
3254           const_motion &= (ref_mv.as_int == ref_mi->mv[0].as_int) &&
3255                           (ref_frame == ref_mi->ref_frame[0]);
3256           skip_ref_frame &= (rf == ref_mi->ref_frame[0]);
3257         }
3258       }
3259 
3260       if (skip_ref_frame && this_mode != NEARESTMV && this_mode != NEWMV)
3261         if (rf > INTRA_FRAME)
3262           if (ref_frame != rf) continue;
3263 
3264       if (const_motion)
3265         if (this_mode == NEARMV || this_mode == ZEROMV) continue;
3266     }
3267 
3268     comp_pred = second_ref_frame > INTRA_FRAME;
3269     if (comp_pred) {
3270       if (!cpi->allow_comp_inter_inter) continue;
3271 
3272       // Skip compound inter modes if ARF is not available.
3273       if (!(cpi->ref_frame_flags & flag_list[second_ref_frame])) continue;
3274 
3275       // Do not allow compound prediction if the segment level reference frame
3276       // feature is in use as in this case there can only be one reference.
3277       if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) continue;
3278 
3279       if ((mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
3280           best_mode_index >= 0 && best_mbmode.ref_frame[0] == INTRA_FRAME)
3281         continue;
3282 
3283       mode_excluded = cm->reference_mode == SINGLE_REFERENCE;
3284     } else {
3285       if (ref_frame != INTRA_FRAME)
3286         mode_excluded = cm->reference_mode == COMPOUND_REFERENCE;
3287     }
3288 
3289     if (ref_frame == INTRA_FRAME) {
3290       if (sf->adaptive_mode_search)
3291         if ((x->source_variance << num_pels_log2_lookup[bsize]) > best_pred_sse)
3292           continue;
3293 
3294       if (this_mode != DC_PRED) {
3295         // Disable intra modes other than DC_PRED for blocks with low variance
3296         // Threshold for intra skipping based on source variance
3297         // TODO(debargha): Specialize the threshold for super block sizes
3298         const unsigned int skip_intra_var_thresh = 64;
3299         if ((mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
3300             x->source_variance < skip_intra_var_thresh)
3301           continue;
3302         // Only search the oblique modes if the best so far is
3303         // one of the neighboring directional modes
3304         if ((mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
3305             (this_mode >= D45_PRED && this_mode <= TM_PRED)) {
3306           if (best_mode_index >= 0 && best_mbmode.ref_frame[0] > INTRA_FRAME)
3307             continue;
3308         }
3309         if (mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
3310           if (conditional_skipintra(this_mode, best_intra_mode)) continue;
3311         }
3312       }
3313     } else {
3314       const MV_REFERENCE_FRAME ref_frames[2] = { ref_frame, second_ref_frame };
3315       if (!check_best_zero_mv(cpi, mbmi_ext->mode_context, frame_mv, this_mode,
3316                               ref_frames))
3317         continue;
3318     }
3319 
3320     mi->mode = this_mode;
3321     mi->uv_mode = DC_PRED;
3322     mi->ref_frame[0] = ref_frame;
3323     mi->ref_frame[1] = second_ref_frame;
3324     // Evaluate all sub-pel filters irrespective of whether we can use
3325     // them for this frame.
3326     mi->interp_filter =
3327         cm->interp_filter == SWITCHABLE ? EIGHTTAP : cm->interp_filter;
3328     mi->mv[0].as_int = mi->mv[1].as_int = 0;
3329 
3330     x->skip = 0;
3331     set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
3332 
3333     // Select prediction reference frames.
3334     for (i = 0; i < MAX_MB_PLANE; i++) {
3335       xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
3336       if (comp_pred) xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
3337     }
3338 
3339     if (ref_frame == INTRA_FRAME) {
3340       TX_SIZE uv_tx;
3341       struct macroblockd_plane *const pd = &xd->plane[1];
3342       memset(x->skip_txfm, 0, sizeof(x->skip_txfm));
3343       super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, NULL, bsize,
3344                       best_rd);
3345       if (rate_y == INT_MAX) continue;
3346 
3347       uv_tx = uv_txsize_lookup[bsize][mi->tx_size][pd->subsampling_x]
3348                               [pd->subsampling_y];
3349       if (rate_uv_intra[uv_tx] == INT_MAX) {
3350         choose_intra_uv_mode(cpi, x, ctx, bsize, uv_tx, &rate_uv_intra[uv_tx],
3351                              &rate_uv_tokenonly[uv_tx], &dist_uv[uv_tx],
3352                              &skip_uv[uv_tx], &mode_uv[uv_tx]);
3353       }
3354 
3355       rate_uv = rate_uv_tokenonly[uv_tx];
3356       distortion_uv = dist_uv[uv_tx];
3357       skippable = skippable && skip_uv[uv_tx];
3358       mi->uv_mode = mode_uv[uv_tx];
3359 
3360       rate2 = rate_y + cpi->mbmode_cost[mi->mode] + rate_uv_intra[uv_tx];
3361       if (this_mode != DC_PRED && this_mode != TM_PRED)
3362         rate2 += intra_cost_penalty;
3363       distortion2 = distortion_y + distortion_uv;
3364     } else {
3365       this_rd = handle_inter_mode(
3366           cpi, x, bsize, &rate2, &distortion2, &skippable, &rate_y, &rate_uv,
3367           &disable_skip, frame_mv, mi_row, mi_col, single_newmv,
3368           single_inter_filter, single_skippable, &total_sse, best_rd,
3369           &mask_filter, filter_cache);
3370       if (this_rd == INT64_MAX) continue;
3371 
3372       compmode_cost = vp9_cost_bit(comp_mode_p, comp_pred);
3373 
3374       if (cm->reference_mode == REFERENCE_MODE_SELECT) rate2 += compmode_cost;
3375     }
3376 
3377     // Estimate the reference frame signaling cost and add it
3378     // to the rolling cost variable.
3379     if (comp_pred) {
3380       rate2 += ref_costs_comp[ref_frame];
3381     } else {
3382       rate2 += ref_costs_single[ref_frame];
3383     }
3384 
3385     if (!disable_skip) {
3386       const vpx_prob skip_prob = vp9_get_skip_prob(cm, xd);
3387       const int skip_cost0 = vp9_cost_bit(skip_prob, 0);
3388       const int skip_cost1 = vp9_cost_bit(skip_prob, 1);
3389 
3390       if (skippable) {
3391         // Back out the coefficient coding costs
3392         rate2 -= (rate_y + rate_uv);
3393 
3394         // Cost the skip mb case
3395         rate2 += skip_cost1;
3396       } else if (ref_frame != INTRA_FRAME && !xd->lossless) {
3397         if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv + skip_cost0,
3398                    distortion2) <
3399             RDCOST(x->rdmult, x->rddiv, skip_cost1, total_sse)) {
3400           // Add in the cost of the no skip flag.
3401           rate2 += skip_cost0;
3402         } else {
3403           // FIXME(rbultje) make this work for splitmv also
3404           assert(total_sse >= 0);
3405 
3406           rate2 += skip_cost1;
3407           distortion2 = total_sse;
3408           rate2 -= (rate_y + rate_uv);
3409           this_skip2 = 1;
3410         }
3411       } else {
3412         // Add in the cost of the no skip flag.
3413         rate2 += skip_cost0;
3414       }
3415 
3416       // Calculate the final RD estimate for this mode.
3417       this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
3418     }
3419 
3420     // Apply an adjustment to the rd value based on the similarity of the
3421     // source variance and reconstructed variance.
3422     rd_variance_adjustment(cpi, x, bsize, &this_rd, ref_frame,
3423                            x->source_variance);
3424 
3425     if (ref_frame == INTRA_FRAME) {
3426       // Keep record of best intra rd
3427       if (this_rd < best_intra_rd) {
3428         best_intra_rd = this_rd;
3429         best_intra_mode = mi->mode;
3430       }
3431     }
3432 
3433     if (!disable_skip && ref_frame == INTRA_FRAME) {
3434       for (i = 0; i < REFERENCE_MODES; ++i)
3435         best_pred_rd[i] = VPXMIN(best_pred_rd[i], this_rd);
3436       for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
3437         best_filter_rd[i] = VPXMIN(best_filter_rd[i], this_rd);
3438     }
3439 
3440     // Did this mode help.. i.e. is it the new best mode
3441     if (this_rd < best_rd || x->skip) {
3442       int max_plane = MAX_MB_PLANE;
3443       if (!mode_excluded) {
3444         // Note index of best mode so far
3445         best_mode_index = mode_index;
3446 
3447         if (ref_frame == INTRA_FRAME) {
3448           /* required for left and above block mv */
3449           mi->mv[0].as_int = 0;
3450           max_plane = 1;
3451           // Initialize interp_filter here so we do not have to check for
3452           // inter block modes in get_pred_context_switchable_interp()
3453           mi->interp_filter = SWITCHABLE_FILTERS;
3454         } else {
3455           best_pred_sse = x->pred_sse[ref_frame];
3456         }
3457 
3458         rd_cost->rate = rate2;
3459         rd_cost->dist = distortion2;
3460         rd_cost->rdcost = this_rd;
3461         best_rd = this_rd;
3462         best_mbmode = *mi;
3463         best_skip2 = this_skip2;
3464         best_mode_skippable = skippable;
3465 
3466         if (!x->select_tx_size) swap_block_ptr(x, ctx, 1, 0, 0, max_plane);
3467         memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mi->tx_size],
3468                sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk);
3469 
3470         // TODO(debargha): enhance this test with a better distortion prediction
3471         // based on qp, activity mask and history
3472         if ((mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
3473             (mode_index > MIN_EARLY_TERM_INDEX)) {
3474           int qstep = xd->plane[0].dequant[1];
3475           // TODO(debargha): Enhance this by specializing for each mode_index
3476           int scale = 4;
3477 #if CONFIG_VP9_HIGHBITDEPTH
3478           if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
3479             qstep >>= (xd->bd - 8);
3480           }
3481 #endif  // CONFIG_VP9_HIGHBITDEPTH
3482           if (x->source_variance < UINT_MAX) {
3483             const int var_adjust = (x->source_variance < 16);
3484             scale -= var_adjust;
3485           }
3486           if (ref_frame > INTRA_FRAME && distortion2 * scale < qstep * qstep) {
3487             early_term = 1;
3488           }
3489         }
3490       }
3491     }
3492 
3493     /* keep record of best compound/single-only prediction */
3494     if (!disable_skip && ref_frame != INTRA_FRAME) {
3495       int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
3496 
3497       if (cm->reference_mode == REFERENCE_MODE_SELECT) {
3498         single_rate = rate2 - compmode_cost;
3499         hybrid_rate = rate2;
3500       } else {
3501         single_rate = rate2;
3502         hybrid_rate = rate2 + compmode_cost;
3503       }
3504 
3505       single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
3506       hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
3507 
3508       if (!comp_pred) {
3509         if (single_rd < best_pred_rd[SINGLE_REFERENCE])
3510           best_pred_rd[SINGLE_REFERENCE] = single_rd;
3511       } else {
3512         if (single_rd < best_pred_rd[COMPOUND_REFERENCE])
3513           best_pred_rd[COMPOUND_REFERENCE] = single_rd;
3514       }
3515       if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT])
3516         best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
3517 
3518       /* keep record of best filter type */
3519       if (!mode_excluded && cm->interp_filter != BILINEAR) {
3520         int64_t ref =
3521             filter_cache[cm->interp_filter == SWITCHABLE ? SWITCHABLE_FILTERS
3522                                                          : cm->interp_filter];
3523 
3524         for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
3525           int64_t adj_rd;
3526           if (ref == INT64_MAX)
3527             adj_rd = 0;
3528           else if (filter_cache[i] == INT64_MAX)
3529             // when early termination is triggered, the encoder does not have
3530             // access to the rate-distortion cost. it only knows that the cost
3531             // should be above the maximum valid value. hence it takes the known
3532             // maximum plus an arbitrary constant as the rate-distortion cost.
3533             adj_rd = mask_filter - ref + 10;
3534           else
3535             adj_rd = filter_cache[i] - ref;
3536 
3537           adj_rd += this_rd;
3538           best_filter_rd[i] = VPXMIN(best_filter_rd[i], adj_rd);
3539         }
3540       }
3541     }
3542 
3543     if (early_term) break;
3544 
3545     if (x->skip && !comp_pred) break;
3546   }
3547 
3548   // The inter modes' rate costs are not calculated precisely in some cases.
3549   // Therefore, sometimes, NEWMV is chosen instead of NEARESTMV, NEARMV, and
3550   // ZEROMV. Here, checks are added for those cases, and the mode decisions
3551   // are corrected.
3552   if (best_mbmode.mode == NEWMV) {
3553     const MV_REFERENCE_FRAME refs[2] = { best_mbmode.ref_frame[0],
3554                                          best_mbmode.ref_frame[1] };
3555     int comp_pred_mode = refs[1] > INTRA_FRAME;
3556 
3557     if (frame_mv[NEARESTMV][refs[0]].as_int == best_mbmode.mv[0].as_int &&
3558         ((comp_pred_mode &&
3559           frame_mv[NEARESTMV][refs[1]].as_int == best_mbmode.mv[1].as_int) ||
3560          !comp_pred_mode))
3561       best_mbmode.mode = NEARESTMV;
3562     else if (frame_mv[NEARMV][refs[0]].as_int == best_mbmode.mv[0].as_int &&
3563              ((comp_pred_mode &&
3564                frame_mv[NEARMV][refs[1]].as_int == best_mbmode.mv[1].as_int) ||
3565               !comp_pred_mode))
3566       best_mbmode.mode = NEARMV;
3567     else if (best_mbmode.mv[0].as_int == 0 &&
3568              ((comp_pred_mode && best_mbmode.mv[1].as_int == 0) ||
3569               !comp_pred_mode))
3570       best_mbmode.mode = ZEROMV;
3571   }
3572 
3573   if (best_mode_index < 0 || best_rd >= best_rd_so_far) {
3574     rd_cost->rate = INT_MAX;
3575     rd_cost->rdcost = INT64_MAX;
3576     return;
3577   }
3578 
3579   // If we used an estimate for the uv intra rd in the loop above...
3580   if (sf->use_uv_intra_rd_estimate) {
3581     // Do Intra UV best rd mode selection if best mode choice above was intra.
3582     if (best_mbmode.ref_frame[0] == INTRA_FRAME) {
3583       TX_SIZE uv_tx_size;
3584       *mi = best_mbmode;
3585       uv_tx_size = get_uv_tx_size(mi, &xd->plane[1]);
3586       rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size],
3587                               &rate_uv_tokenonly[uv_tx_size],
3588                               &dist_uv[uv_tx_size], &skip_uv[uv_tx_size],
3589                               bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize,
3590                               uv_tx_size);
3591     }
3592   }
3593 
3594   assert((cm->interp_filter == SWITCHABLE) ||
3595          (cm->interp_filter == best_mbmode.interp_filter) ||
3596          !is_inter_block(&best_mbmode));
3597 
3598   if (!cpi->rc.is_src_frame_alt_ref)
3599     vp9_update_rd_thresh_fact(tile_data->thresh_freq_fact,
3600                               sf->adaptive_rd_thresh, bsize, best_mode_index);
3601 
3602   // macroblock modes
3603   *mi = best_mbmode;
3604   x->skip |= best_skip2;
3605 
3606   for (i = 0; i < REFERENCE_MODES; ++i) {
3607     if (best_pred_rd[i] == INT64_MAX)
3608       best_pred_diff[i] = INT_MIN;
3609     else
3610       best_pred_diff[i] = best_rd - best_pred_rd[i];
3611   }
3612 
3613   if (!x->skip) {
3614     for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
3615       if (best_filter_rd[i] == INT64_MAX)
3616         best_filter_diff[i] = 0;
3617       else
3618         best_filter_diff[i] = best_rd - best_filter_rd[i];
3619     }
3620     if (cm->interp_filter == SWITCHABLE)
3621       assert(best_filter_diff[SWITCHABLE_FILTERS] == 0);
3622   } else {
3623     vp9_zero(best_filter_diff);
3624   }
3625 
3626   // TODO(yunqingwang): Moving this line in front of the above best_filter_diff
3627   // updating code causes PSNR loss. Need to figure out the confliction.
3628   x->skip |= best_mode_skippable;
3629 
3630   if (!x->skip && !x->select_tx_size) {
3631     int has_high_freq_coeff = 0;
3632     int plane;
3633     int max_plane = is_inter_block(xd->mi[0]) ? MAX_MB_PLANE : 1;
3634     for (plane = 0; plane < max_plane; ++plane) {
3635       x->plane[plane].eobs = ctx->eobs_pbuf[plane][1];
3636       has_high_freq_coeff |= vp9_has_high_freq_in_plane(x, bsize, plane);
3637     }
3638 
3639     for (plane = max_plane; plane < MAX_MB_PLANE; ++plane) {
3640       x->plane[plane].eobs = ctx->eobs_pbuf[plane][2];
3641       has_high_freq_coeff |= vp9_has_high_freq_in_plane(x, bsize, plane);
3642     }
3643 
3644     best_mode_skippable |= !has_high_freq_coeff;
3645   }
3646 
3647   assert(best_mode_index >= 0);
3648 
3649   store_coding_context(x, ctx, best_mode_index, best_pred_diff,
3650                        best_filter_diff, best_mode_skippable);
3651 }
3652 
vp9_rd_pick_inter_mode_sb_seg_skip(VP9_COMP * cpi,TileDataEnc * tile_data,MACROBLOCK * x,RD_COST * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,int64_t best_rd_so_far)3653 void vp9_rd_pick_inter_mode_sb_seg_skip(VP9_COMP *cpi, TileDataEnc *tile_data,
3654                                         MACROBLOCK *x, RD_COST *rd_cost,
3655                                         BLOCK_SIZE bsize,
3656                                         PICK_MODE_CONTEXT *ctx,
3657                                         int64_t best_rd_so_far) {
3658   VP9_COMMON *const cm = &cpi->common;
3659   MACROBLOCKD *const xd = &x->e_mbd;
3660   MODE_INFO *const mi = xd->mi[0];
3661   unsigned char segment_id = mi->segment_id;
3662   const int comp_pred = 0;
3663   int i;
3664   int64_t best_pred_diff[REFERENCE_MODES];
3665   int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
3666   unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
3667   vpx_prob comp_mode_p;
3668   INTERP_FILTER best_filter = SWITCHABLE;
3669   int64_t this_rd = INT64_MAX;
3670   int rate2 = 0;
3671   const int64_t distortion2 = 0;
3672 
3673   x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
3674 
3675   estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
3676                            &comp_mode_p);
3677 
3678   for (i = 0; i < MAX_REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX;
3679   for (i = LAST_FRAME; i < MAX_REF_FRAMES; ++i) x->pred_mv_sad[i] = INT_MAX;
3680 
3681   rd_cost->rate = INT_MAX;
3682 
3683   assert(segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP));
3684 
3685   mi->mode = ZEROMV;
3686   mi->uv_mode = DC_PRED;
3687   mi->ref_frame[0] = LAST_FRAME;
3688   mi->ref_frame[1] = NONE;
3689   mi->mv[0].as_int = 0;
3690   x->skip = 1;
3691 
3692   if (cm->interp_filter != BILINEAR) {
3693     best_filter = EIGHTTAP;
3694     if (cm->interp_filter == SWITCHABLE &&
3695         x->source_variance >= cpi->sf.disable_filter_search_var_thresh) {
3696       int rs;
3697       int best_rs = INT_MAX;
3698       for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
3699         mi->interp_filter = i;
3700         rs = vp9_get_switchable_rate(cpi, xd);
3701         if (rs < best_rs) {
3702           best_rs = rs;
3703           best_filter = mi->interp_filter;
3704         }
3705       }
3706     }
3707   }
3708   // Set the appropriate filter
3709   if (cm->interp_filter == SWITCHABLE) {
3710     mi->interp_filter = best_filter;
3711     rate2 += vp9_get_switchable_rate(cpi, xd);
3712   } else {
3713     mi->interp_filter = cm->interp_filter;
3714   }
3715 
3716   if (cm->reference_mode == REFERENCE_MODE_SELECT)
3717     rate2 += vp9_cost_bit(comp_mode_p, comp_pred);
3718 
3719   // Estimate the reference frame signaling cost and add it
3720   // to the rolling cost variable.
3721   rate2 += ref_costs_single[LAST_FRAME];
3722   this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
3723 
3724   rd_cost->rate = rate2;
3725   rd_cost->dist = distortion2;
3726   rd_cost->rdcost = this_rd;
3727 
3728   if (this_rd >= best_rd_so_far) {
3729     rd_cost->rate = INT_MAX;
3730     rd_cost->rdcost = INT64_MAX;
3731     return;
3732   }
3733 
3734   assert((cm->interp_filter == SWITCHABLE) ||
3735          (cm->interp_filter == mi->interp_filter));
3736 
3737   vp9_update_rd_thresh_fact(tile_data->thresh_freq_fact,
3738                             cpi->sf.adaptive_rd_thresh, bsize, THR_ZEROMV);
3739 
3740   vp9_zero(best_pred_diff);
3741   vp9_zero(best_filter_diff);
3742 
3743   if (!x->select_tx_size) swap_block_ptr(x, ctx, 1, 0, 0, MAX_MB_PLANE);
3744   store_coding_context(x, ctx, THR_ZEROMV, best_pred_diff, best_filter_diff, 0);
3745 }
3746 
vp9_rd_pick_inter_mode_sub8x8(VP9_COMP * cpi,TileDataEnc * tile_data,MACROBLOCK * x,int mi_row,int mi_col,RD_COST * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,int64_t best_rd_so_far)3747 void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, TileDataEnc *tile_data,
3748                                    MACROBLOCK *x, int mi_row, int mi_col,
3749                                    RD_COST *rd_cost, BLOCK_SIZE bsize,
3750                                    PICK_MODE_CONTEXT *ctx,
3751                                    int64_t best_rd_so_far) {
3752   VP9_COMMON *const cm = &cpi->common;
3753   RD_OPT *const rd_opt = &cpi->rd;
3754   SPEED_FEATURES *const sf = &cpi->sf;
3755   MACROBLOCKD *const xd = &x->e_mbd;
3756   MODE_INFO *const mi = xd->mi[0];
3757   const struct segmentation *const seg = &cm->seg;
3758   MV_REFERENCE_FRAME ref_frame, second_ref_frame;
3759   unsigned char segment_id = mi->segment_id;
3760   int comp_pred, i;
3761   int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
3762   struct buf_2d yv12_mb[4][MAX_MB_PLANE];
3763   static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
3764                                     VP9_ALT_FLAG };
3765   int64_t best_rd = best_rd_so_far;
3766   int64_t best_yrd = best_rd_so_far;  // FIXME(rbultje) more precise
3767   int64_t best_pred_diff[REFERENCE_MODES];
3768   int64_t best_pred_rd[REFERENCE_MODES];
3769   int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS];
3770   int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS];
3771   MODE_INFO best_mbmode;
3772   int ref_index, best_ref_index = 0;
3773   unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES];
3774   vpx_prob comp_mode_p;
3775   INTERP_FILTER tmp_best_filter = SWITCHABLE;
3776   int rate_uv_intra, rate_uv_tokenonly;
3777   int64_t dist_uv;
3778   int skip_uv;
3779   PREDICTION_MODE mode_uv = DC_PRED;
3780   const int intra_cost_penalty = vp9_get_intra_cost_penalty(
3781       cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth);
3782   int_mv seg_mvs[4][MAX_REF_FRAMES];
3783   b_mode_info best_bmodes[4];
3784   int best_skip2 = 0;
3785   int ref_frame_skip_mask[2] = { 0 };
3786   int64_t mask_filter = 0;
3787   int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS];
3788   int internal_active_edge =
3789       vp9_active_edge_sb(cpi, mi_row, mi_col) && vp9_internal_image_edge(cpi);
3790 
3791   x->skip_encode = sf->skip_encode_frame && x->q_index < QIDX_SKIP_THRESH;
3792   memset(x->zcoeff_blk[TX_4X4], 0, 4);
3793   vp9_zero(best_mbmode);
3794 
3795   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) filter_cache[i] = INT64_MAX;
3796 
3797   for (i = 0; i < 4; i++) {
3798     int j;
3799     for (j = 0; j < MAX_REF_FRAMES; j++) seg_mvs[i][j].as_int = INVALID_MV;
3800   }
3801 
3802   estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp,
3803                            &comp_mode_p);
3804 
3805   for (i = 0; i < REFERENCE_MODES; ++i) best_pred_rd[i] = INT64_MAX;
3806   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
3807     best_filter_rd[i] = INT64_MAX;
3808   rate_uv_intra = INT_MAX;
3809 
3810   rd_cost->rate = INT_MAX;
3811 
3812   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
3813     if (cpi->ref_frame_flags & flag_list[ref_frame]) {
3814       setup_buffer_inter(cpi, x, ref_frame, bsize, mi_row, mi_col,
3815                          frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb);
3816     } else {
3817       ref_frame_skip_mask[0] |= (1 << ref_frame);
3818       ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
3819     }
3820     frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
3821     frame_mv[ZEROMV][ref_frame].as_int = 0;
3822   }
3823 
3824   for (ref_index = 0; ref_index < MAX_REFS; ++ref_index) {
3825     int mode_excluded = 0;
3826     int64_t this_rd = INT64_MAX;
3827     int disable_skip = 0;
3828     int compmode_cost = 0;
3829     int rate2 = 0, rate_y = 0, rate_uv = 0;
3830     int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0;
3831     int skippable = 0;
3832     int i;
3833     int this_skip2 = 0;
3834     int64_t total_sse = INT_MAX;
3835     int early_term = 0;
3836     struct buf_2d backup_yv12[2][MAX_MB_PLANE];
3837 
3838     ref_frame = vp9_ref_order[ref_index].ref_frame[0];
3839     second_ref_frame = vp9_ref_order[ref_index].ref_frame[1];
3840 
3841 #if CONFIG_BETTER_HW_COMPATIBILITY
3842     // forbid 8X4 and 4X8 partitions if any reference frame is scaled.
3843     if (bsize == BLOCK_8X4 || bsize == BLOCK_4X8) {
3844       int ref_scaled = vp9_is_scaled(&cm->frame_refs[ref_frame - 1].sf);
3845       if (second_ref_frame > INTRA_FRAME)
3846         ref_scaled += vp9_is_scaled(&cm->frame_refs[second_ref_frame - 1].sf);
3847       if (ref_scaled) continue;
3848     }
3849 #endif
3850     // Look at the reference frame of the best mode so far and set the
3851     // skip mask to look at a subset of the remaining modes.
3852     if (ref_index > 2 && sf->mode_skip_start < MAX_MODES) {
3853       if (ref_index == 3) {
3854         switch (best_mbmode.ref_frame[0]) {
3855           case INTRA_FRAME: break;
3856           case LAST_FRAME:
3857             ref_frame_skip_mask[0] |= (1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME);
3858             ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
3859             break;
3860           case GOLDEN_FRAME:
3861             ref_frame_skip_mask[0] |= (1 << LAST_FRAME) | (1 << ALTREF_FRAME);
3862             ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
3863             break;
3864           case ALTREF_FRAME:
3865             ref_frame_skip_mask[0] |= (1 << GOLDEN_FRAME) | (1 << LAST_FRAME);
3866             break;
3867           case NONE:
3868           case MAX_REF_FRAMES: assert(0 && "Invalid Reference frame"); break;
3869         }
3870       }
3871     }
3872 
3873     if ((ref_frame_skip_mask[0] & (1 << ref_frame)) &&
3874         (ref_frame_skip_mask[1] & (1 << VPXMAX(0, second_ref_frame))))
3875       continue;
3876 
3877     // Test best rd so far against threshold for trying this mode.
3878     if (!internal_active_edge &&
3879         rd_less_than_thresh(best_rd,
3880                             rd_opt->threshes[segment_id][bsize][ref_index],
3881                             tile_data->thresh_freq_fact[bsize][ref_index]))
3882       continue;
3883 
3884     comp_pred = second_ref_frame > INTRA_FRAME;
3885     if (comp_pred) {
3886       if (!cpi->allow_comp_inter_inter) continue;
3887       if (!(cpi->ref_frame_flags & flag_list[second_ref_frame])) continue;
3888       // Do not allow compound prediction if the segment level reference frame
3889       // feature is in use as in this case there can only be one reference.
3890       if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) continue;
3891 
3892       if ((sf->mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) &&
3893           best_mbmode.ref_frame[0] == INTRA_FRAME)
3894         continue;
3895     }
3896 
3897     if (comp_pred)
3898       mode_excluded = cm->reference_mode == SINGLE_REFERENCE;
3899     else if (ref_frame != INTRA_FRAME)
3900       mode_excluded = cm->reference_mode == COMPOUND_REFERENCE;
3901 
3902     // If the segment reference frame feature is enabled....
3903     // then do nothing if the current ref frame is not allowed..
3904     if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
3905         get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
3906       continue;
3907       // Disable this drop out case if the ref frame
3908       // segment level feature is enabled for this segment. This is to
3909       // prevent the possibility that we end up unable to pick any mode.
3910     } else if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
3911       // Only consider ZEROMV/ALTREF_FRAME for alt ref frame,
3912       // unless ARNR filtering is enabled in which case we want
3913       // an unfiltered alternative. We allow near/nearest as well
3914       // because they may result in zero-zero MVs but be cheaper.
3915       if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0))
3916         continue;
3917     }
3918 
3919     mi->tx_size = TX_4X4;
3920     mi->uv_mode = DC_PRED;
3921     mi->ref_frame[0] = ref_frame;
3922     mi->ref_frame[1] = second_ref_frame;
3923     // Evaluate all sub-pel filters irrespective of whether we can use
3924     // them for this frame.
3925     mi->interp_filter =
3926         cm->interp_filter == SWITCHABLE ? EIGHTTAP : cm->interp_filter;
3927     x->skip = 0;
3928     set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
3929 
3930     // Select prediction reference frames.
3931     for (i = 0; i < MAX_MB_PLANE; i++) {
3932       xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
3933       if (comp_pred) xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
3934     }
3935 
3936     if (ref_frame == INTRA_FRAME) {
3937       int rate;
3938       if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate, &rate_y, &distortion_y,
3939                                        best_rd) >= best_rd)
3940         continue;
3941       rate2 += rate;
3942       rate2 += intra_cost_penalty;
3943       distortion2 += distortion_y;
3944 
3945       if (rate_uv_intra == INT_MAX) {
3946         choose_intra_uv_mode(cpi, x, ctx, bsize, TX_4X4, &rate_uv_intra,
3947                              &rate_uv_tokenonly, &dist_uv, &skip_uv, &mode_uv);
3948       }
3949       rate2 += rate_uv_intra;
3950       rate_uv = rate_uv_tokenonly;
3951       distortion2 += dist_uv;
3952       distortion_uv = dist_uv;
3953       mi->uv_mode = mode_uv;
3954     } else {
3955       int rate;
3956       int64_t distortion;
3957       int64_t this_rd_thresh;
3958       int64_t tmp_rd, tmp_best_rd = INT64_MAX, tmp_best_rdu = INT64_MAX;
3959       int tmp_best_rate = INT_MAX, tmp_best_ratey = INT_MAX;
3960       int64_t tmp_best_distortion = INT_MAX, tmp_best_sse, uv_sse;
3961       int tmp_best_skippable = 0;
3962       int switchable_filter_index;
3963       int_mv *second_ref =
3964           comp_pred ? &x->mbmi_ext->ref_mvs[second_ref_frame][0] : NULL;
3965       b_mode_info tmp_best_bmodes[16];
3966       MODE_INFO tmp_best_mbmode;
3967       BEST_SEG_INFO bsi[SWITCHABLE_FILTERS];
3968       int pred_exists = 0;
3969       int uv_skippable;
3970 
3971       YV12_BUFFER_CONFIG *scaled_ref_frame[2] = { NULL, NULL };
3972       int ref;
3973 
3974       for (ref = 0; ref < 2; ++ref) {
3975         scaled_ref_frame[ref] =
3976             mi->ref_frame[ref] > INTRA_FRAME
3977                 ? vp9_get_scaled_ref_frame(cpi, mi->ref_frame[ref])
3978                 : NULL;
3979 
3980         if (scaled_ref_frame[ref]) {
3981           int i;
3982           // Swap out the reference frame for a version that's been scaled to
3983           // match the resolution of the current frame, allowing the existing
3984           // motion search code to be used without additional modifications.
3985           for (i = 0; i < MAX_MB_PLANE; i++)
3986             backup_yv12[ref][i] = xd->plane[i].pre[ref];
3987           vp9_setup_pre_planes(xd, ref, scaled_ref_frame[ref], mi_row, mi_col,
3988                                NULL);
3989         }
3990       }
3991 
3992       this_rd_thresh = (ref_frame == LAST_FRAME)
3993                            ? rd_opt->threshes[segment_id][bsize][THR_LAST]
3994                            : rd_opt->threshes[segment_id][bsize][THR_ALTR];
3995       this_rd_thresh = (ref_frame == GOLDEN_FRAME)
3996                            ? rd_opt->threshes[segment_id][bsize][THR_GOLD]
3997                            : this_rd_thresh;
3998       for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
3999         filter_cache[i] = INT64_MAX;
4000 
4001       if (cm->interp_filter != BILINEAR) {
4002         tmp_best_filter = EIGHTTAP;
4003         if (x->source_variance < sf->disable_filter_search_var_thresh) {
4004           tmp_best_filter = EIGHTTAP;
4005         } else if (sf->adaptive_pred_interp_filter == 1 &&
4006                    ctx->pred_interp_filter < SWITCHABLE) {
4007           tmp_best_filter = ctx->pred_interp_filter;
4008         } else if (sf->adaptive_pred_interp_filter == 2) {
4009           tmp_best_filter = ctx->pred_interp_filter < SWITCHABLE
4010                                 ? ctx->pred_interp_filter
4011                                 : 0;
4012         } else {
4013           for (switchable_filter_index = 0;
4014                switchable_filter_index < SWITCHABLE_FILTERS;
4015                ++switchable_filter_index) {
4016             int newbest, rs;
4017             int64_t rs_rd;
4018             MB_MODE_INFO_EXT *mbmi_ext = x->mbmi_ext;
4019             mi->interp_filter = switchable_filter_index;
4020             tmp_rd = rd_pick_best_sub8x8_mode(
4021                 cpi, x, &mbmi_ext->ref_mvs[ref_frame][0], second_ref, best_yrd,
4022                 &rate, &rate_y, &distortion, &skippable, &total_sse,
4023                 (int)this_rd_thresh, seg_mvs, bsi, switchable_filter_index,
4024                 mi_row, mi_col);
4025 
4026             if (tmp_rd == INT64_MAX) continue;
4027             rs = vp9_get_switchable_rate(cpi, xd);
4028             rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0);
4029             filter_cache[switchable_filter_index] = tmp_rd;
4030             filter_cache[SWITCHABLE_FILTERS] =
4031                 VPXMIN(filter_cache[SWITCHABLE_FILTERS], tmp_rd + rs_rd);
4032             if (cm->interp_filter == SWITCHABLE) tmp_rd += rs_rd;
4033 
4034             mask_filter = VPXMAX(mask_filter, tmp_rd);
4035 
4036             newbest = (tmp_rd < tmp_best_rd);
4037             if (newbest) {
4038               tmp_best_filter = mi->interp_filter;
4039               tmp_best_rd = tmp_rd;
4040             }
4041             if ((newbest && cm->interp_filter == SWITCHABLE) ||
4042                 (mi->interp_filter == cm->interp_filter &&
4043                  cm->interp_filter != SWITCHABLE)) {
4044               tmp_best_rdu = tmp_rd;
4045               tmp_best_rate = rate;
4046               tmp_best_ratey = rate_y;
4047               tmp_best_distortion = distortion;
4048               tmp_best_sse = total_sse;
4049               tmp_best_skippable = skippable;
4050               tmp_best_mbmode = *mi;
4051               for (i = 0; i < 4; i++) {
4052                 tmp_best_bmodes[i] = xd->mi[0]->bmi[i];
4053                 x->zcoeff_blk[TX_4X4][i] = !x->plane[0].eobs[i];
4054               }
4055               pred_exists = 1;
4056               if (switchable_filter_index == 0 && sf->use_rd_breakout &&
4057                   best_rd < INT64_MAX) {
4058                 if (tmp_best_rdu / 2 > best_rd) {
4059                   // skip searching the other filters if the first is
4060                   // already substantially larger than the best so far
4061                   tmp_best_filter = mi->interp_filter;
4062                   tmp_best_rdu = INT64_MAX;
4063                   break;
4064                 }
4065               }
4066             }
4067           }  // switchable_filter_index loop
4068         }
4069       }
4070 
4071       if (tmp_best_rdu == INT64_MAX && pred_exists) continue;
4072 
4073       mi->interp_filter = (cm->interp_filter == SWITCHABLE ? tmp_best_filter
4074                                                            : cm->interp_filter);
4075       if (!pred_exists) {
4076         // Handles the special case when a filter that is not in the
4077         // switchable list (bilinear, 6-tap) is indicated at the frame level
4078         tmp_rd = rd_pick_best_sub8x8_mode(
4079             cpi, x, &x->mbmi_ext->ref_mvs[ref_frame][0], second_ref, best_yrd,
4080             &rate, &rate_y, &distortion, &skippable, &total_sse,
4081             (int)this_rd_thresh, seg_mvs, bsi, 0, mi_row, mi_col);
4082         if (tmp_rd == INT64_MAX) continue;
4083       } else {
4084         total_sse = tmp_best_sse;
4085         rate = tmp_best_rate;
4086         rate_y = tmp_best_ratey;
4087         distortion = tmp_best_distortion;
4088         skippable = tmp_best_skippable;
4089         *mi = tmp_best_mbmode;
4090         for (i = 0; i < 4; i++) xd->mi[0]->bmi[i] = tmp_best_bmodes[i];
4091       }
4092 
4093       rate2 += rate;
4094       distortion2 += distortion;
4095 
4096       if (cm->interp_filter == SWITCHABLE)
4097         rate2 += vp9_get_switchable_rate(cpi, xd);
4098 
4099       if (!mode_excluded)
4100         mode_excluded = comp_pred ? cm->reference_mode == SINGLE_REFERENCE
4101                                   : cm->reference_mode == COMPOUND_REFERENCE;
4102 
4103       compmode_cost = vp9_cost_bit(comp_mode_p, comp_pred);
4104 
4105       tmp_best_rdu =
4106           best_rd - VPXMIN(RDCOST(x->rdmult, x->rddiv, rate2, distortion2),
4107                            RDCOST(x->rdmult, x->rddiv, 0, total_sse));
4108 
4109       if (tmp_best_rdu > 0) {
4110         // If even the 'Y' rd value of split is higher than best so far
4111         // then dont bother looking at UV
4112         vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col, BLOCK_8X8);
4113         memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm));
4114         if (!super_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable,
4115                               &uv_sse, BLOCK_8X8, tmp_best_rdu)) {
4116           for (ref = 0; ref < 2; ++ref) {
4117             if (scaled_ref_frame[ref]) {
4118               int i;
4119               for (i = 0; i < MAX_MB_PLANE; ++i)
4120                 xd->plane[i].pre[ref] = backup_yv12[ref][i];
4121             }
4122           }
4123           continue;
4124         }
4125 
4126         rate2 += rate_uv;
4127         distortion2 += distortion_uv;
4128         skippable = skippable && uv_skippable;
4129         total_sse += uv_sse;
4130       }
4131 
4132       for (ref = 0; ref < 2; ++ref) {
4133         if (scaled_ref_frame[ref]) {
4134           // Restore the prediction frame pointers to their unscaled versions.
4135           int i;
4136           for (i = 0; i < MAX_MB_PLANE; ++i)
4137             xd->plane[i].pre[ref] = backup_yv12[ref][i];
4138         }
4139       }
4140     }
4141 
4142     if (cm->reference_mode == REFERENCE_MODE_SELECT) rate2 += compmode_cost;
4143 
4144     // Estimate the reference frame signaling cost and add it
4145     // to the rolling cost variable.
4146     if (second_ref_frame > INTRA_FRAME) {
4147       rate2 += ref_costs_comp[ref_frame];
4148     } else {
4149       rate2 += ref_costs_single[ref_frame];
4150     }
4151 
4152     if (!disable_skip) {
4153       const vpx_prob skip_prob = vp9_get_skip_prob(cm, xd);
4154       const int skip_cost0 = vp9_cost_bit(skip_prob, 0);
4155       const int skip_cost1 = vp9_cost_bit(skip_prob, 1);
4156 
4157       // Skip is never coded at the segment level for sub8x8 blocks and instead
4158       // always coded in the bitstream at the mode info level.
4159       if (ref_frame != INTRA_FRAME && !xd->lossless) {
4160         if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv + skip_cost0,
4161                    distortion2) <
4162             RDCOST(x->rdmult, x->rddiv, skip_cost1, total_sse)) {
4163           // Add in the cost of the no skip flag.
4164           rate2 += skip_cost0;
4165         } else {
4166           // FIXME(rbultje) make this work for splitmv also
4167           rate2 += skip_cost1;
4168           distortion2 = total_sse;
4169           assert(total_sse >= 0);
4170           rate2 -= (rate_y + rate_uv);
4171           rate_y = 0;
4172           rate_uv = 0;
4173           this_skip2 = 1;
4174         }
4175       } else {
4176         // Add in the cost of the no skip flag.
4177         rate2 += skip_cost0;
4178       }
4179 
4180       // Calculate the final RD estimate for this mode.
4181       this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
4182     }
4183 
4184     if (!disable_skip && ref_frame == INTRA_FRAME) {
4185       for (i = 0; i < REFERENCE_MODES; ++i)
4186         best_pred_rd[i] = VPXMIN(best_pred_rd[i], this_rd);
4187       for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++)
4188         best_filter_rd[i] = VPXMIN(best_filter_rd[i], this_rd);
4189     }
4190 
4191     // Did this mode help.. i.e. is it the new best mode
4192     if (this_rd < best_rd || x->skip) {
4193       if (!mode_excluded) {
4194         int max_plane = MAX_MB_PLANE;
4195         // Note index of best mode so far
4196         best_ref_index = ref_index;
4197 
4198         if (ref_frame == INTRA_FRAME) {
4199           /* required for left and above block mv */
4200           mi->mv[0].as_int = 0;
4201           max_plane = 1;
4202           // Initialize interp_filter here so we do not have to check for
4203           // inter block modes in get_pred_context_switchable_interp()
4204           mi->interp_filter = SWITCHABLE_FILTERS;
4205         }
4206 
4207         rd_cost->rate = rate2;
4208         rd_cost->dist = distortion2;
4209         rd_cost->rdcost = this_rd;
4210         best_rd = this_rd;
4211         best_yrd =
4212             best_rd - RDCOST(x->rdmult, x->rddiv, rate_uv, distortion_uv);
4213         best_mbmode = *mi;
4214         best_skip2 = this_skip2;
4215         if (!x->select_tx_size) swap_block_ptr(x, ctx, 1, 0, 0, max_plane);
4216         memcpy(ctx->zcoeff_blk, x->zcoeff_blk[TX_4X4],
4217                sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk);
4218 
4219         for (i = 0; i < 4; i++) best_bmodes[i] = xd->mi[0]->bmi[i];
4220 
4221         // TODO(debargha): enhance this test with a better distortion prediction
4222         // based on qp, activity mask and history
4223         if ((sf->mode_search_skip_flags & FLAG_EARLY_TERMINATE) &&
4224             (ref_index > MIN_EARLY_TERM_INDEX)) {
4225           int qstep = xd->plane[0].dequant[1];
4226           // TODO(debargha): Enhance this by specializing for each mode_index
4227           int scale = 4;
4228 #if CONFIG_VP9_HIGHBITDEPTH
4229           if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
4230             qstep >>= (xd->bd - 8);
4231           }
4232 #endif  // CONFIG_VP9_HIGHBITDEPTH
4233           if (x->source_variance < UINT_MAX) {
4234             const int var_adjust = (x->source_variance < 16);
4235             scale -= var_adjust;
4236           }
4237           if (ref_frame > INTRA_FRAME && distortion2 * scale < qstep * qstep) {
4238             early_term = 1;
4239           }
4240         }
4241       }
4242     }
4243 
4244     /* keep record of best compound/single-only prediction */
4245     if (!disable_skip && ref_frame != INTRA_FRAME) {
4246       int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
4247 
4248       if (cm->reference_mode == REFERENCE_MODE_SELECT) {
4249         single_rate = rate2 - compmode_cost;
4250         hybrid_rate = rate2;
4251       } else {
4252         single_rate = rate2;
4253         hybrid_rate = rate2 + compmode_cost;
4254       }
4255 
4256       single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2);
4257       hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2);
4258 
4259       if (!comp_pred && single_rd < best_pred_rd[SINGLE_REFERENCE])
4260         best_pred_rd[SINGLE_REFERENCE] = single_rd;
4261       else if (comp_pred && single_rd < best_pred_rd[COMPOUND_REFERENCE])
4262         best_pred_rd[COMPOUND_REFERENCE] = single_rd;
4263 
4264       if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT])
4265         best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
4266     }
4267 
4268     /* keep record of best filter type */
4269     if (!mode_excluded && !disable_skip && ref_frame != INTRA_FRAME &&
4270         cm->interp_filter != BILINEAR) {
4271       int64_t ref =
4272           filter_cache[cm->interp_filter == SWITCHABLE ? SWITCHABLE_FILTERS
4273                                                        : cm->interp_filter];
4274       int64_t adj_rd;
4275       for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
4276         if (ref == INT64_MAX)
4277           adj_rd = 0;
4278         else if (filter_cache[i] == INT64_MAX)
4279           // when early termination is triggered, the encoder does not have
4280           // access to the rate-distortion cost. it only knows that the cost
4281           // should be above the maximum valid value. hence it takes the known
4282           // maximum plus an arbitrary constant as the rate-distortion cost.
4283           adj_rd = mask_filter - ref + 10;
4284         else
4285           adj_rd = filter_cache[i] - ref;
4286 
4287         adj_rd += this_rd;
4288         best_filter_rd[i] = VPXMIN(best_filter_rd[i], adj_rd);
4289       }
4290     }
4291 
4292     if (early_term) break;
4293 
4294     if (x->skip && !comp_pred) break;
4295   }
4296 
4297   if (best_rd >= best_rd_so_far) {
4298     rd_cost->rate = INT_MAX;
4299     rd_cost->rdcost = INT64_MAX;
4300     return;
4301   }
4302 
4303   // If we used an estimate for the uv intra rd in the loop above...
4304   if (sf->use_uv_intra_rd_estimate) {
4305     // Do Intra UV best rd mode selection if best mode choice above was intra.
4306     if (best_mbmode.ref_frame[0] == INTRA_FRAME) {
4307       *mi = best_mbmode;
4308       rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra, &rate_uv_tokenonly,
4309                               &dist_uv, &skip_uv, BLOCK_8X8, TX_4X4);
4310     }
4311   }
4312 
4313   if (best_rd == INT64_MAX) {
4314     rd_cost->rate = INT_MAX;
4315     rd_cost->dist = INT64_MAX;
4316     rd_cost->rdcost = INT64_MAX;
4317     return;
4318   }
4319 
4320   assert((cm->interp_filter == SWITCHABLE) ||
4321          (cm->interp_filter == best_mbmode.interp_filter) ||
4322          !is_inter_block(&best_mbmode));
4323 
4324   vp9_update_rd_thresh_fact(tile_data->thresh_freq_fact, sf->adaptive_rd_thresh,
4325                             bsize, best_ref_index);
4326 
4327   // macroblock modes
4328   *mi = best_mbmode;
4329   x->skip |= best_skip2;
4330   if (!is_inter_block(&best_mbmode)) {
4331     for (i = 0; i < 4; i++) xd->mi[0]->bmi[i].as_mode = best_bmodes[i].as_mode;
4332   } else {
4333     for (i = 0; i < 4; ++i)
4334       memcpy(&xd->mi[0]->bmi[i], &best_bmodes[i], sizeof(b_mode_info));
4335 
4336     mi->mv[0].as_int = xd->mi[0]->bmi[3].as_mv[0].as_int;
4337     mi->mv[1].as_int = xd->mi[0]->bmi[3].as_mv[1].as_int;
4338   }
4339 
4340   for (i = 0; i < REFERENCE_MODES; ++i) {
4341     if (best_pred_rd[i] == INT64_MAX)
4342       best_pred_diff[i] = INT_MIN;
4343     else
4344       best_pred_diff[i] = best_rd - best_pred_rd[i];
4345   }
4346 
4347   if (!x->skip) {
4348     for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
4349       if (best_filter_rd[i] == INT64_MAX)
4350         best_filter_diff[i] = 0;
4351       else
4352         best_filter_diff[i] = best_rd - best_filter_rd[i];
4353     }
4354     if (cm->interp_filter == SWITCHABLE)
4355       assert(best_filter_diff[SWITCHABLE_FILTERS] == 0);
4356   } else {
4357     vp9_zero(best_filter_diff);
4358   }
4359 
4360   store_coding_context(x, ctx, best_ref_index, best_pred_diff, best_filter_diff,
4361                        0);
4362 }
4363