1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <assert.h>
13 #include <math.h>
14 #include <stdio.h>
15 
16 #include "config/av1_rtcd.h"
17 
18 #include "aom_dsp/aom_dsp_common.h"
19 #include "aom_mem/aom_mem.h"
20 #include "aom_ports/bitops.h"
21 #include "aom_ports/mem.h"
22 
23 #include "av1/common/common.h"
24 #include "av1/common/entropy.h"
25 #include "av1/common/entropymode.h"
26 #include "av1/common/mvref_common.h"
27 #include "av1/common/pred_common.h"
28 #include "av1/common/quant_common.h"
29 #include "av1/common/reconinter.h"
30 #include "av1/common/reconintra.h"
31 #include "av1/common/seg_common.h"
32 
33 #include "av1/encoder/av1_quantize.h"
34 #include "av1/encoder/cost.h"
35 #include "av1/encoder/encodemb.h"
36 #include "av1/encoder/encodemv.h"
37 #include "av1/encoder/encoder.h"
38 #include "av1/encoder/encodetxb.h"
39 #include "av1/encoder/mcomp.h"
40 #include "av1/encoder/ratectrl.h"
41 #include "av1/encoder/rd.h"
42 #include "av1/encoder/tokenize.h"
43 
44 #define RD_THRESH_POW 1.25
45 
46 // The baseline rd thresholds for breaking out of the rd loop for
47 // certain modes are assumed to be based on 8x8 blocks.
48 // This table is used to correct for block size.
49 // The factors here are << 2 (2 = x0.5, 32 = x8 etc).
50 static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES_ALL] = {
51   2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32, 48, 48, 64, 4, 4, 8, 8, 16, 16
52 };
53 
54 static const int use_intra_ext_tx_for_txsize[EXT_TX_SETS_INTRA]
55                                             [EXT_TX_SIZES] = {
56                                               { 1, 1, 1, 1 },  // unused
57                                               { 1, 1, 0, 0 },
58                                               { 0, 0, 1, 0 },
59                                             };
60 
61 static const int use_inter_ext_tx_for_txsize[EXT_TX_SETS_INTER]
62                                             [EXT_TX_SIZES] = {
63                                               { 1, 1, 1, 1 },  // unused
64                                               { 1, 1, 0, 0 },
65                                               { 0, 0, 1, 0 },
66                                               { 0, 1, 1, 1 },
67                                             };
68 
69 static const int av1_ext_tx_set_idx_to_type[2][AOMMAX(EXT_TX_SETS_INTRA,
70                                                       EXT_TX_SETS_INTER)] = {
71   {
72       // Intra
73       EXT_TX_SET_DCTONLY,
74       EXT_TX_SET_DTT4_IDTX_1DDCT,
75       EXT_TX_SET_DTT4_IDTX,
76   },
77   {
78       // Inter
79       EXT_TX_SET_DCTONLY,
80       EXT_TX_SET_ALL16,
81       EXT_TX_SET_DTT9_IDTX_1DDCT,
82       EXT_TX_SET_DCT_IDTX,
83   },
84 };
85 
av1_fill_mode_rates(AV1_COMMON * const cm,ModeCosts * mode_costs,FRAME_CONTEXT * fc)86 void av1_fill_mode_rates(AV1_COMMON *const cm, ModeCosts *mode_costs,
87                          FRAME_CONTEXT *fc) {
88   int i, j;
89 
90   for (i = 0; i < PARTITION_CONTEXTS; ++i)
91     av1_cost_tokens_from_cdf(mode_costs->partition_cost[i],
92                              fc->partition_cdf[i], NULL);
93 
94   if (cm->current_frame.skip_mode_info.skip_mode_flag) {
95     for (i = 0; i < SKIP_MODE_CONTEXTS; ++i) {
96       av1_cost_tokens_from_cdf(mode_costs->skip_mode_cost[i],
97                                fc->skip_mode_cdfs[i], NULL);
98     }
99   }
100 
101   for (i = 0; i < SKIP_CONTEXTS; ++i) {
102     av1_cost_tokens_from_cdf(mode_costs->skip_txfm_cost[i],
103                              fc->skip_txfm_cdfs[i], NULL);
104   }
105 
106   for (i = 0; i < KF_MODE_CONTEXTS; ++i)
107     for (j = 0; j < KF_MODE_CONTEXTS; ++j)
108       av1_cost_tokens_from_cdf(mode_costs->y_mode_costs[i][j],
109                                fc->kf_y_cdf[i][j], NULL);
110 
111   for (i = 0; i < BLOCK_SIZE_GROUPS; ++i)
112     av1_cost_tokens_from_cdf(mode_costs->mbmode_cost[i], fc->y_mode_cdf[i],
113                              NULL);
114   for (i = 0; i < CFL_ALLOWED_TYPES; ++i)
115     for (j = 0; j < INTRA_MODES; ++j)
116       av1_cost_tokens_from_cdf(mode_costs->intra_uv_mode_cost[i][j],
117                                fc->uv_mode_cdf[i][j], NULL);
118 
119   av1_cost_tokens_from_cdf(mode_costs->filter_intra_mode_cost,
120                            fc->filter_intra_mode_cdf, NULL);
121   for (i = 0; i < BLOCK_SIZES_ALL; ++i) {
122     if (av1_filter_intra_allowed_bsize(cm, i))
123       av1_cost_tokens_from_cdf(mode_costs->filter_intra_cost[i],
124                                fc->filter_intra_cdfs[i], NULL);
125   }
126 
127   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
128     av1_cost_tokens_from_cdf(mode_costs->switchable_interp_costs[i],
129                              fc->switchable_interp_cdf[i], NULL);
130 
131   for (i = 0; i < PALATTE_BSIZE_CTXS; ++i) {
132     av1_cost_tokens_from_cdf(mode_costs->palette_y_size_cost[i],
133                              fc->palette_y_size_cdf[i], NULL);
134     av1_cost_tokens_from_cdf(mode_costs->palette_uv_size_cost[i],
135                              fc->palette_uv_size_cdf[i], NULL);
136     for (j = 0; j < PALETTE_Y_MODE_CONTEXTS; ++j) {
137       av1_cost_tokens_from_cdf(mode_costs->palette_y_mode_cost[i][j],
138                                fc->palette_y_mode_cdf[i][j], NULL);
139     }
140   }
141 
142   for (i = 0; i < PALETTE_UV_MODE_CONTEXTS; ++i) {
143     av1_cost_tokens_from_cdf(mode_costs->palette_uv_mode_cost[i],
144                              fc->palette_uv_mode_cdf[i], NULL);
145   }
146 
147   for (i = 0; i < PALETTE_SIZES; ++i) {
148     for (j = 0; j < PALETTE_COLOR_INDEX_CONTEXTS; ++j) {
149       av1_cost_tokens_from_cdf(mode_costs->palette_y_color_cost[i][j],
150                                fc->palette_y_color_index_cdf[i][j], NULL);
151       av1_cost_tokens_from_cdf(mode_costs->palette_uv_color_cost[i][j],
152                                fc->palette_uv_color_index_cdf[i][j], NULL);
153     }
154   }
155 
156   int sign_cost[CFL_JOINT_SIGNS];
157   av1_cost_tokens_from_cdf(sign_cost, fc->cfl_sign_cdf, NULL);
158   for (int joint_sign = 0; joint_sign < CFL_JOINT_SIGNS; joint_sign++) {
159     int *cost_u = mode_costs->cfl_cost[joint_sign][CFL_PRED_U];
160     int *cost_v = mode_costs->cfl_cost[joint_sign][CFL_PRED_V];
161     if (CFL_SIGN_U(joint_sign) == CFL_SIGN_ZERO) {
162       memset(cost_u, 0, CFL_ALPHABET_SIZE * sizeof(*cost_u));
163     } else {
164       const aom_cdf_prob *cdf_u = fc->cfl_alpha_cdf[CFL_CONTEXT_U(joint_sign)];
165       av1_cost_tokens_from_cdf(cost_u, cdf_u, NULL);
166     }
167     if (CFL_SIGN_V(joint_sign) == CFL_SIGN_ZERO) {
168       memset(cost_v, 0, CFL_ALPHABET_SIZE * sizeof(*cost_v));
169     } else {
170       const aom_cdf_prob *cdf_v = fc->cfl_alpha_cdf[CFL_CONTEXT_V(joint_sign)];
171       av1_cost_tokens_from_cdf(cost_v, cdf_v, NULL);
172     }
173     for (int u = 0; u < CFL_ALPHABET_SIZE; u++)
174       cost_u[u] += sign_cost[joint_sign];
175   }
176 
177   for (i = 0; i < MAX_TX_CATS; ++i)
178     for (j = 0; j < TX_SIZE_CONTEXTS; ++j)
179       av1_cost_tokens_from_cdf(mode_costs->tx_size_cost[i][j],
180                                fc->tx_size_cdf[i][j], NULL);
181 
182   for (i = 0; i < TXFM_PARTITION_CONTEXTS; ++i) {
183     av1_cost_tokens_from_cdf(mode_costs->txfm_partition_cost[i],
184                              fc->txfm_partition_cdf[i], NULL);
185   }
186 
187   for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
188     int s;
189     for (s = 1; s < EXT_TX_SETS_INTER; ++s) {
190       if (use_inter_ext_tx_for_txsize[s][i]) {
191         av1_cost_tokens_from_cdf(
192             mode_costs->inter_tx_type_costs[s][i], fc->inter_ext_tx_cdf[s][i],
193             av1_ext_tx_inv[av1_ext_tx_set_idx_to_type[1][s]]);
194       }
195     }
196     for (s = 1; s < EXT_TX_SETS_INTRA; ++s) {
197       if (use_intra_ext_tx_for_txsize[s][i]) {
198         for (j = 0; j < INTRA_MODES; ++j) {
199           av1_cost_tokens_from_cdf(
200               mode_costs->intra_tx_type_costs[s][i][j],
201               fc->intra_ext_tx_cdf[s][i][j],
202               av1_ext_tx_inv[av1_ext_tx_set_idx_to_type[0][s]]);
203         }
204       }
205     }
206   }
207   for (i = 0; i < DIRECTIONAL_MODES; ++i) {
208     av1_cost_tokens_from_cdf(mode_costs->angle_delta_cost[i],
209                              fc->angle_delta_cdf[i], NULL);
210   }
211   av1_cost_tokens_from_cdf(mode_costs->intrabc_cost, fc->intrabc_cdf, NULL);
212 
213   if (!frame_is_intra_only(cm)) {
214     for (i = 0; i < COMP_INTER_CONTEXTS; ++i) {
215       av1_cost_tokens_from_cdf(mode_costs->comp_inter_cost[i],
216                                fc->comp_inter_cdf[i], NULL);
217     }
218 
219     for (i = 0; i < REF_CONTEXTS; ++i) {
220       for (j = 0; j < SINGLE_REFS - 1; ++j) {
221         av1_cost_tokens_from_cdf(mode_costs->single_ref_cost[i][j],
222                                  fc->single_ref_cdf[i][j], NULL);
223       }
224     }
225 
226     for (i = 0; i < COMP_REF_TYPE_CONTEXTS; ++i) {
227       av1_cost_tokens_from_cdf(mode_costs->comp_ref_type_cost[i],
228                                fc->comp_ref_type_cdf[i], NULL);
229     }
230 
231     for (i = 0; i < UNI_COMP_REF_CONTEXTS; ++i) {
232       for (j = 0; j < UNIDIR_COMP_REFS - 1; ++j) {
233         av1_cost_tokens_from_cdf(mode_costs->uni_comp_ref_cost[i][j],
234                                  fc->uni_comp_ref_cdf[i][j], NULL);
235       }
236     }
237 
238     for (i = 0; i < REF_CONTEXTS; ++i) {
239       for (j = 0; j < FWD_REFS - 1; ++j) {
240         av1_cost_tokens_from_cdf(mode_costs->comp_ref_cost[i][j],
241                                  fc->comp_ref_cdf[i][j], NULL);
242       }
243     }
244 
245     for (i = 0; i < REF_CONTEXTS; ++i) {
246       for (j = 0; j < BWD_REFS - 1; ++j) {
247         av1_cost_tokens_from_cdf(mode_costs->comp_bwdref_cost[i][j],
248                                  fc->comp_bwdref_cdf[i][j], NULL);
249       }
250     }
251 
252     for (i = 0; i < INTRA_INTER_CONTEXTS; ++i) {
253       av1_cost_tokens_from_cdf(mode_costs->intra_inter_cost[i],
254                                fc->intra_inter_cdf[i], NULL);
255     }
256 
257     for (i = 0; i < NEWMV_MODE_CONTEXTS; ++i) {
258       av1_cost_tokens_from_cdf(mode_costs->newmv_mode_cost[i], fc->newmv_cdf[i],
259                                NULL);
260     }
261 
262     for (i = 0; i < GLOBALMV_MODE_CONTEXTS; ++i) {
263       av1_cost_tokens_from_cdf(mode_costs->zeromv_mode_cost[i],
264                                fc->zeromv_cdf[i], NULL);
265     }
266 
267     for (i = 0; i < REFMV_MODE_CONTEXTS; ++i) {
268       av1_cost_tokens_from_cdf(mode_costs->refmv_mode_cost[i], fc->refmv_cdf[i],
269                                NULL);
270     }
271 
272     for (i = 0; i < DRL_MODE_CONTEXTS; ++i) {
273       av1_cost_tokens_from_cdf(mode_costs->drl_mode_cost0[i], fc->drl_cdf[i],
274                                NULL);
275     }
276     for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
277       av1_cost_tokens_from_cdf(mode_costs->inter_compound_mode_cost[i],
278                                fc->inter_compound_mode_cdf[i], NULL);
279     for (i = 0; i < BLOCK_SIZES_ALL; ++i)
280       av1_cost_tokens_from_cdf(mode_costs->compound_type_cost[i],
281                                fc->compound_type_cdf[i], NULL);
282     for (i = 0; i < BLOCK_SIZES_ALL; ++i) {
283       if (av1_is_wedge_used(i)) {
284         av1_cost_tokens_from_cdf(mode_costs->wedge_idx_cost[i],
285                                  fc->wedge_idx_cdf[i], NULL);
286       }
287     }
288     for (i = 0; i < BLOCK_SIZE_GROUPS; ++i) {
289       av1_cost_tokens_from_cdf(mode_costs->interintra_cost[i],
290                                fc->interintra_cdf[i], NULL);
291       av1_cost_tokens_from_cdf(mode_costs->interintra_mode_cost[i],
292                                fc->interintra_mode_cdf[i], NULL);
293     }
294     for (i = 0; i < BLOCK_SIZES_ALL; ++i) {
295       av1_cost_tokens_from_cdf(mode_costs->wedge_interintra_cost[i],
296                                fc->wedge_interintra_cdf[i], NULL);
297     }
298     for (i = BLOCK_8X8; i < BLOCK_SIZES_ALL; i++) {
299       av1_cost_tokens_from_cdf(mode_costs->motion_mode_cost[i],
300                                fc->motion_mode_cdf[i], NULL);
301     }
302     for (i = BLOCK_8X8; i < BLOCK_SIZES_ALL; i++) {
303       av1_cost_tokens_from_cdf(mode_costs->motion_mode_cost1[i],
304                                fc->obmc_cdf[i], NULL);
305     }
306     for (i = 0; i < COMP_INDEX_CONTEXTS; ++i) {
307       av1_cost_tokens_from_cdf(mode_costs->comp_idx_cost[i],
308                                fc->compound_index_cdf[i], NULL);
309     }
310     for (i = 0; i < COMP_GROUP_IDX_CONTEXTS; ++i) {
311       av1_cost_tokens_from_cdf(mode_costs->comp_group_idx_cost[i],
312                                fc->comp_group_idx_cdf[i], NULL);
313     }
314   }
315 }
316 
av1_fill_lr_rates(ModeCosts * mode_costs,FRAME_CONTEXT * fc)317 void av1_fill_lr_rates(ModeCosts *mode_costs, FRAME_CONTEXT *fc) {
318   av1_cost_tokens_from_cdf(mode_costs->switchable_restore_cost,
319                            fc->switchable_restore_cdf, NULL);
320   av1_cost_tokens_from_cdf(mode_costs->wiener_restore_cost,
321                            fc->wiener_restore_cdf, NULL);
322   av1_cost_tokens_from_cdf(mode_costs->sgrproj_restore_cost,
323                            fc->sgrproj_restore_cdf, NULL);
324 }
325 
326 // Values are now correlated to quantizer.
327 static int sad_per_bit_lut_8[QINDEX_RANGE];
328 static int sad_per_bit_lut_10[QINDEX_RANGE];
329 static int sad_per_bit_lut_12[QINDEX_RANGE];
330 
init_me_luts_bd(int * bit16lut,int range,aom_bit_depth_t bit_depth)331 static void init_me_luts_bd(int *bit16lut, int range,
332                             aom_bit_depth_t bit_depth) {
333   int i;
334   // Initialize the sad lut tables using a formulaic calculation for now.
335   // This is to make it easier to resolve the impact of experimental changes
336   // to the quantizer tables.
337   for (i = 0; i < range; i++) {
338     const double q = av1_convert_qindex_to_q(i, bit_depth);
339     bit16lut[i] = (int)(0.0418 * q + 2.4107);
340   }
341 }
342 
av1_init_me_luts(void)343 void av1_init_me_luts(void) {
344   init_me_luts_bd(sad_per_bit_lut_8, QINDEX_RANGE, AOM_BITS_8);
345   init_me_luts_bd(sad_per_bit_lut_10, QINDEX_RANGE, AOM_BITS_10);
346   init_me_luts_bd(sad_per_bit_lut_12, QINDEX_RANGE, AOM_BITS_12);
347 }
348 
349 static const int rd_boost_factor[16] = { 64, 32, 32, 32, 24, 16, 12, 12,
350                                          8,  8,  4,  4,  2,  2,  1,  0 };
351 
352 static const int rd_layer_depth_factor[7] = {
353   160, 160, 160, 160, 192, 208, 224
354 };
355 
356 // Returns the default rd multiplier for inter frames for a given qindex.
357 // The function here is a first pass estimate based on data from
358 // a previous Vizer run
def_inter_rd_multiplier(int qindex)359 static double def_inter_rd_multiplier(int qindex) {
360   return 3.2 + (0.0035 * (double)qindex);
361 }
362 
363 // Returns the default rd multiplier for ARF/Golden Frames for a given qindex.
364 // The function here is a first pass estimate based on data from
365 // a previous Vizer run
def_arf_rd_multiplier(int qindex)366 static double def_arf_rd_multiplier(int qindex) {
367   return 3.25 + (0.0035 * (double)qindex);
368 }
369 
370 // Returns the default rd multiplier for key frames for a given qindex.
371 // The function here is a first pass estimate based on data from
372 // a previous Vizer run
def_kf_rd_multiplier(int qindex)373 static double def_kf_rd_multiplier(int qindex) {
374   return 3.3 + (0.0035 * (double)qindex);
375 }
376 
av1_compute_rd_mult_based_on_qindex(aom_bit_depth_t bit_depth,FRAME_UPDATE_TYPE update_type,int qindex)377 int av1_compute_rd_mult_based_on_qindex(aom_bit_depth_t bit_depth,
378                                         FRAME_UPDATE_TYPE update_type,
379                                         int qindex) {
380   const int q = av1_dc_quant_QTX(qindex, 0, bit_depth);
381   int rdmult = q * q;
382   if (update_type == KF_UPDATE) {
383     double def_rd_q_mult = def_kf_rd_multiplier(qindex);
384     rdmult = (int)((double)rdmult * def_rd_q_mult);
385   } else if ((update_type == GF_UPDATE) || (update_type == ARF_UPDATE)) {
386     double def_rd_q_mult = def_arf_rd_multiplier(qindex);
387     rdmult = (int)((double)rdmult * def_rd_q_mult);
388   } else {
389     double def_rd_q_mult = def_inter_rd_multiplier(qindex);
390     rdmult = (int)((double)rdmult * def_rd_q_mult);
391   }
392 
393   switch (bit_depth) {
394     case AOM_BITS_8: break;
395     case AOM_BITS_10: rdmult = ROUND_POWER_OF_TWO(rdmult, 4); break;
396     case AOM_BITS_12: rdmult = ROUND_POWER_OF_TWO(rdmult, 8); break;
397     default:
398       assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
399       return -1;
400   }
401   return rdmult > 0 ? rdmult : 1;
402 }
403 
av1_compute_rd_mult(const AV1_COMP * cpi,int qindex)404 int av1_compute_rd_mult(const AV1_COMP *cpi, int qindex) {
405   const aom_bit_depth_t bit_depth = cpi->common.seq_params->bit_depth;
406   const FRAME_UPDATE_TYPE update_type =
407       cpi->ppi->gf_group.update_type[cpi->gf_frame_index];
408   int64_t rdmult =
409       av1_compute_rd_mult_based_on_qindex(bit_depth, update_type, qindex);
410   if (is_stat_consumption_stage(cpi) && !cpi->oxcf.q_cfg.use_fixed_qp_offsets &&
411       (cpi->common.current_frame.frame_type != KEY_FRAME)) {
412     const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
413     const int boost_index = AOMMIN(15, (cpi->ppi->p_rc.gfu_boost / 100));
414     const int layer_depth =
415         AOMMIN(gf_group->layer_depth[cpi->gf_frame_index], 6);
416 
417     // Layer depth adjustment
418     rdmult = (rdmult * rd_layer_depth_factor[layer_depth]) >> 7;
419 
420     // ARF boost adjustment
421     rdmult += ((rdmult * rd_boost_factor[boost_index]) >> 7);
422   }
423   return (int)rdmult;
424 }
425 
av1_get_deltaq_offset(aom_bit_depth_t bit_depth,int qindex,double beta)426 int av1_get_deltaq_offset(aom_bit_depth_t bit_depth, int qindex, double beta) {
427   assert(beta > 0.0);
428   int q = av1_dc_quant_QTX(qindex, 0, bit_depth);
429   int newq = (int)rint(q / sqrt(beta));
430   int orig_qindex = qindex;
431   if (newq == q) {
432     return 0;
433   }
434   if (newq < q) {
435     while (qindex > 0) {
436       qindex--;
437       q = av1_dc_quant_QTX(qindex, 0, bit_depth);
438       if (newq >= q) {
439         break;
440       }
441     }
442   } else {
443     while (qindex < MAXQ) {
444       qindex++;
445       q = av1_dc_quant_QTX(qindex, 0, bit_depth);
446       if (newq <= q) {
447         break;
448       }
449     }
450   }
451   return qindex - orig_qindex;
452 }
453 
av1_adjust_q_from_delta_q_res(int delta_q_res,int prev_qindex,int curr_qindex)454 int av1_adjust_q_from_delta_q_res(int delta_q_res, int prev_qindex,
455                                   int curr_qindex) {
456   curr_qindex = clamp(curr_qindex, delta_q_res, 256 - delta_q_res);
457   const int sign_deltaq_index = curr_qindex - prev_qindex >= 0 ? 1 : -1;
458   const int deltaq_deadzone = delta_q_res / 4;
459   const int qmask = ~(delta_q_res - 1);
460   int abs_deltaq_index = abs(curr_qindex - prev_qindex);
461   abs_deltaq_index = (abs_deltaq_index + deltaq_deadzone) & qmask;
462   int adjust_qindex = prev_qindex + sign_deltaq_index * abs_deltaq_index;
463   adjust_qindex = AOMMAX(adjust_qindex, MINQ + 1);
464   return adjust_qindex;
465 }
466 
av1_get_adaptive_rdmult(const AV1_COMP * cpi,double beta)467 int av1_get_adaptive_rdmult(const AV1_COMP *cpi, double beta) {
468   assert(beta > 0.0);
469   const AV1_COMMON *cm = &cpi->common;
470   int q = av1_dc_quant_QTX(cm->quant_params.base_qindex, 0,
471                            cm->seq_params->bit_depth);
472 
473   return (int)(av1_compute_rd_mult(cpi, q) / beta);
474 }
475 
compute_rd_thresh_factor(int qindex,aom_bit_depth_t bit_depth)476 static int compute_rd_thresh_factor(int qindex, aom_bit_depth_t bit_depth) {
477   double q;
478   switch (bit_depth) {
479     case AOM_BITS_8: q = av1_dc_quant_QTX(qindex, 0, AOM_BITS_8) / 4.0; break;
480     case AOM_BITS_10:
481       q = av1_dc_quant_QTX(qindex, 0, AOM_BITS_10) / 16.0;
482       break;
483     case AOM_BITS_12:
484       q = av1_dc_quant_QTX(qindex, 0, AOM_BITS_12) / 64.0;
485       break;
486     default:
487       assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
488       return -1;
489   }
490   // TODO(debargha): Adjust the function below.
491   return AOMMAX((int)(pow(q, RD_THRESH_POW) * 5.12), 8);
492 }
493 
av1_set_sad_per_bit(const AV1_COMP * cpi,int * sadperbit,int qindex)494 void av1_set_sad_per_bit(const AV1_COMP *cpi, int *sadperbit, int qindex) {
495   switch (cpi->common.seq_params->bit_depth) {
496     case AOM_BITS_8: *sadperbit = sad_per_bit_lut_8[qindex]; break;
497     case AOM_BITS_10: *sadperbit = sad_per_bit_lut_10[qindex]; break;
498     case AOM_BITS_12: *sadperbit = sad_per_bit_lut_12[qindex]; break;
499     default:
500       assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
501   }
502 }
503 
set_block_thresholds(const AV1_COMMON * cm,RD_OPT * rd)504 static void set_block_thresholds(const AV1_COMMON *cm, RD_OPT *rd) {
505   int i, bsize, segment_id;
506 
507   for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
508     const int qindex = clamp(
509         av1_get_qindex(&cm->seg, segment_id, cm->quant_params.base_qindex) +
510             cm->quant_params.y_dc_delta_q,
511         0, MAXQ);
512     const int q = compute_rd_thresh_factor(qindex, cm->seq_params->bit_depth);
513 
514     for (bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
515       // Threshold here seems unnecessarily harsh but fine given actual
516       // range of values used for cpi->sf.thresh_mult[].
517       const int t = q * rd_thresh_block_size_factor[bsize];
518       const int thresh_max = INT_MAX / t;
519 
520       for (i = 0; i < MAX_MODES; ++i)
521         rd->threshes[segment_id][bsize][i] = rd->thresh_mult[i] < thresh_max
522                                                  ? rd->thresh_mult[i] * t / 4
523                                                  : INT_MAX;
524     }
525   }
526 }
527 
av1_fill_coeff_costs(CoeffCosts * coeff_costs,FRAME_CONTEXT * fc,const int num_planes)528 void av1_fill_coeff_costs(CoeffCosts *coeff_costs, FRAME_CONTEXT *fc,
529                           const int num_planes) {
530   const int nplanes = AOMMIN(num_planes, PLANE_TYPES);
531   for (int eob_multi_size = 0; eob_multi_size < 7; ++eob_multi_size) {
532     for (int plane = 0; plane < nplanes; ++plane) {
533       LV_MAP_EOB_COST *pcost = &coeff_costs->eob_costs[eob_multi_size][plane];
534 
535       for (int ctx = 0; ctx < 2; ++ctx) {
536         aom_cdf_prob *pcdf;
537         switch (eob_multi_size) {
538           case 0: pcdf = fc->eob_flag_cdf16[plane][ctx]; break;
539           case 1: pcdf = fc->eob_flag_cdf32[plane][ctx]; break;
540           case 2: pcdf = fc->eob_flag_cdf64[plane][ctx]; break;
541           case 3: pcdf = fc->eob_flag_cdf128[plane][ctx]; break;
542           case 4: pcdf = fc->eob_flag_cdf256[plane][ctx]; break;
543           case 5: pcdf = fc->eob_flag_cdf512[plane][ctx]; break;
544           case 6:
545           default: pcdf = fc->eob_flag_cdf1024[plane][ctx]; break;
546         }
547         av1_cost_tokens_from_cdf(pcost->eob_cost[ctx], pcdf, NULL);
548       }
549     }
550   }
551   for (int tx_size = 0; tx_size < TX_SIZES; ++tx_size) {
552     for (int plane = 0; plane < nplanes; ++plane) {
553       LV_MAP_COEFF_COST *pcost = &coeff_costs->coeff_costs[tx_size][plane];
554 
555       for (int ctx = 0; ctx < TXB_SKIP_CONTEXTS; ++ctx)
556         av1_cost_tokens_from_cdf(pcost->txb_skip_cost[ctx],
557                                  fc->txb_skip_cdf[tx_size][ctx], NULL);
558 
559       for (int ctx = 0; ctx < SIG_COEF_CONTEXTS_EOB; ++ctx)
560         av1_cost_tokens_from_cdf(pcost->base_eob_cost[ctx],
561                                  fc->coeff_base_eob_cdf[tx_size][plane][ctx],
562                                  NULL);
563       for (int ctx = 0; ctx < SIG_COEF_CONTEXTS; ++ctx)
564         av1_cost_tokens_from_cdf(pcost->base_cost[ctx],
565                                  fc->coeff_base_cdf[tx_size][plane][ctx], NULL);
566 
567       for (int ctx = 0; ctx < SIG_COEF_CONTEXTS; ++ctx) {
568         pcost->base_cost[ctx][4] = 0;
569         pcost->base_cost[ctx][5] = pcost->base_cost[ctx][1] +
570                                    av1_cost_literal(1) -
571                                    pcost->base_cost[ctx][0];
572         pcost->base_cost[ctx][6] =
573             pcost->base_cost[ctx][2] - pcost->base_cost[ctx][1];
574         pcost->base_cost[ctx][7] =
575             pcost->base_cost[ctx][3] - pcost->base_cost[ctx][2];
576       }
577 
578       for (int ctx = 0; ctx < EOB_COEF_CONTEXTS; ++ctx)
579         av1_cost_tokens_from_cdf(pcost->eob_extra_cost[ctx],
580                                  fc->eob_extra_cdf[tx_size][plane][ctx], NULL);
581 
582       for (int ctx = 0; ctx < DC_SIGN_CONTEXTS; ++ctx)
583         av1_cost_tokens_from_cdf(pcost->dc_sign_cost[ctx],
584                                  fc->dc_sign_cdf[plane][ctx], NULL);
585 
586       for (int ctx = 0; ctx < LEVEL_CONTEXTS; ++ctx) {
587         int br_rate[BR_CDF_SIZE];
588         int prev_cost = 0;
589         int i, j;
590         av1_cost_tokens_from_cdf(
591             br_rate, fc->coeff_br_cdf[AOMMIN(tx_size, TX_32X32)][plane][ctx],
592             NULL);
593         // printf("br_rate: ");
594         // for(j = 0; j < BR_CDF_SIZE; j++)
595         //  printf("%4d ", br_rate[j]);
596         // printf("\n");
597         for (i = 0; i < COEFF_BASE_RANGE; i += BR_CDF_SIZE - 1) {
598           for (j = 0; j < BR_CDF_SIZE - 1; j++) {
599             pcost->lps_cost[ctx][i + j] = prev_cost + br_rate[j];
600           }
601           prev_cost += br_rate[j];
602         }
603         pcost->lps_cost[ctx][i] = prev_cost;
604         // printf("lps_cost: %d %d %2d : ", tx_size, plane, ctx);
605         // for (i = 0; i <= COEFF_BASE_RANGE; i++)
606         //  printf("%5d ", pcost->lps_cost[ctx][i]);
607         // printf("\n");
608       }
609       for (int ctx = 0; ctx < LEVEL_CONTEXTS; ++ctx) {
610         pcost->lps_cost[ctx][0 + COEFF_BASE_RANGE + 1] =
611             pcost->lps_cost[ctx][0];
612         for (int i = 1; i <= COEFF_BASE_RANGE; ++i) {
613           pcost->lps_cost[ctx][i + COEFF_BASE_RANGE + 1] =
614               pcost->lps_cost[ctx][i] - pcost->lps_cost[ctx][i - 1];
615         }
616       }
617     }
618   }
619 }
620 
av1_fill_mv_costs(const nmv_context * nmvc,int integer_mv,int usehp,MvCosts * mv_costs)621 void av1_fill_mv_costs(const nmv_context *nmvc, int integer_mv, int usehp,
622                        MvCosts *mv_costs) {
623   mv_costs->nmv_cost[0] = &mv_costs->nmv_cost_alloc[0][MV_MAX];
624   mv_costs->nmv_cost[1] = &mv_costs->nmv_cost_alloc[1][MV_MAX];
625   mv_costs->nmv_cost_hp[0] = &mv_costs->nmv_cost_hp_alloc[0][MV_MAX];
626   mv_costs->nmv_cost_hp[1] = &mv_costs->nmv_cost_hp_alloc[1][MV_MAX];
627   if (integer_mv) {
628     mv_costs->mv_cost_stack = (int **)&mv_costs->nmv_cost;
629     av1_build_nmv_cost_table(mv_costs->nmv_joint_cost, mv_costs->mv_cost_stack,
630                              nmvc, MV_SUBPEL_NONE);
631   } else {
632     mv_costs->mv_cost_stack =
633         usehp ? mv_costs->nmv_cost_hp : mv_costs->nmv_cost;
634     av1_build_nmv_cost_table(mv_costs->nmv_joint_cost, mv_costs->mv_cost_stack,
635                              nmvc, usehp);
636   }
637 }
638 
av1_fill_dv_costs(const nmv_context * ndvc,IntraBCMVCosts * dv_costs)639 void av1_fill_dv_costs(const nmv_context *ndvc, IntraBCMVCosts *dv_costs) {
640   dv_costs->dv_costs[0] = &dv_costs->dv_costs_alloc[0][MV_MAX];
641   dv_costs->dv_costs[1] = &dv_costs->dv_costs_alloc[1][MV_MAX];
642   av1_build_nmv_cost_table(dv_costs->joint_mv, dv_costs->dv_costs, ndvc,
643                            MV_SUBPEL_NONE);
644 }
645 
av1_initialize_rd_consts(AV1_COMP * cpi)646 void av1_initialize_rd_consts(AV1_COMP *cpi) {
647   AV1_COMMON *const cm = &cpi->common;
648   MACROBLOCK *const x = &cpi->td.mb;
649   RD_OPT *const rd = &cpi->rd;
650   MvCosts *mv_costs = x->mv_costs;
651   int use_nonrd_pick_mode = cpi->sf.rt_sf.use_nonrd_pick_mode;
652   CostUpdateFreq cost_upd_freq = cpi->oxcf.cost_upd_freq;
653   int fill_costs =
654       frame_is_intra_only(cm) || (cm->current_frame.frame_number & 0x07) == 1;
655   int num_planes = av1_num_planes(cm);
656 
657   rd->RDMULT = av1_compute_rd_mult(
658       cpi, cm->quant_params.base_qindex + cm->quant_params.y_dc_delta_q);
659 #if CONFIG_RD_COMMAND
660   if (cpi->oxcf.pass == 2) {
661     const RD_COMMAND *rd_command = &cpi->rd_command;
662     if (rd_command->option_ls[rd_command->frame_index] ==
663         RD_OPTION_SET_Q_RDMULT) {
664       rd->RDMULT = rd_command->rdmult_ls[rd_command->frame_index];
665     }
666   }
667 #endif  // CONFIG_RD_COMMAND
668 
669   av1_set_error_per_bit(&x->errorperbit, rd->RDMULT);
670 
671   set_block_thresholds(cm, rd);
672 
673   if ((!use_nonrd_pick_mode && cost_upd_freq.mv != COST_UPD_OFF) ||
674       cost_upd_freq.mv == COST_UPD_TILE || fill_costs)
675     av1_fill_mv_costs(&cm->fc->nmvc, cm->features.cur_frame_force_integer_mv,
676                       cm->features.allow_high_precision_mv, mv_costs);
677 
678   if ((!use_nonrd_pick_mode && cost_upd_freq.coeff != COST_UPD_OFF) ||
679       cost_upd_freq.coeff == COST_UPD_TILE || fill_costs)
680     av1_fill_coeff_costs(&x->coeff_costs, cm->fc, num_planes);
681 
682   if ((!use_nonrd_pick_mode && cost_upd_freq.mode != COST_UPD_OFF) ||
683       cost_upd_freq.mode == COST_UPD_TILE || fill_costs)
684     av1_fill_mode_rates(cm, &x->mode_costs, cm->fc);
685 
686   if (!use_nonrd_pick_mode && av1_allow_intrabc(cm) &&
687       !is_stat_generation_stage(cpi)) {
688     av1_fill_dv_costs(&cm->fc->ndvc, x->dv_costs);
689   }
690 }
691 
model_rd_norm(int xsq_q10,int * r_q10,int * d_q10)692 static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
693   // NOTE: The tables below must be of the same size.
694 
695   // The functions described below are sampled at the four most significant
696   // bits of x^2 + 8 / 256.
697 
698   // Normalized rate:
699   // This table models the rate for a Laplacian source with given variance
700   // when quantized with a uniform quantizer with given stepsize. The
701   // closed form expression is:
702   // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
703   // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
704   // and H(x) is the binary entropy function.
705   static const int rate_tab_q10[] = {
706     65536, 6086, 5574, 5275, 5063, 4899, 4764, 4651, 4553, 4389, 4255, 4142,
707     4044,  3958, 3881, 3811, 3748, 3635, 3538, 3453, 3376, 3307, 3244, 3186,
708     3133,  3037, 2952, 2877, 2809, 2747, 2690, 2638, 2589, 2501, 2423, 2353,
709     2290,  2232, 2179, 2130, 2084, 2001, 1928, 1862, 1802, 1748, 1698, 1651,
710     1608,  1530, 1460, 1398, 1342, 1290, 1243, 1199, 1159, 1086, 1021, 963,
711     911,   864,  821,  781,  745,  680,  623,  574,  530,  490,  455,  424,
712     395,   345,  304,  269,  239,  213,  190,  171,  154,  126,  104,  87,
713     73,    61,   52,   44,   38,   28,   21,   16,   12,   10,   8,    6,
714     5,     3,    2,    1,    1,    1,    0,    0,
715   };
716   // Normalized distortion:
717   // This table models the normalized distortion for a Laplacian source
718   // with given variance when quantized with a uniform quantizer
719   // with given stepsize. The closed form expression is:
720   // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
721   // where x = qpstep / sqrt(variance).
722   // Note the actual distortion is Dn * variance.
723   static const int dist_tab_q10[] = {
724     0,    0,    1,    1,    1,    2,    2,    2,    3,    3,    4,    5,
725     5,    6,    7,    7,    8,    9,    11,   12,   13,   15,   16,   17,
726     18,   21,   24,   26,   29,   31,   34,   36,   39,   44,   49,   54,
727     59,   64,   69,   73,   78,   88,   97,   106,  115,  124,  133,  142,
728     151,  167,  184,  200,  215,  231,  245,  260,  274,  301,  327,  351,
729     375,  397,  418,  439,  458,  495,  528,  559,  587,  613,  637,  659,
730     680,  717,  749,  777,  801,  823,  842,  859,  874,  899,  919,  936,
731     949,  960,  969,  977,  983,  994,  1001, 1006, 1010, 1013, 1015, 1017,
732     1018, 1020, 1022, 1022, 1023, 1023, 1023, 1024,
733   };
734   static const int xsq_iq_q10[] = {
735     0,      4,      8,      12,     16,     20,     24,     28,     32,
736     40,     48,     56,     64,     72,     80,     88,     96,     112,
737     128,    144,    160,    176,    192,    208,    224,    256,    288,
738     320,    352,    384,    416,    448,    480,    544,    608,    672,
739     736,    800,    864,    928,    992,    1120,   1248,   1376,   1504,
740     1632,   1760,   1888,   2016,   2272,   2528,   2784,   3040,   3296,
741     3552,   3808,   4064,   4576,   5088,   5600,   6112,   6624,   7136,
742     7648,   8160,   9184,   10208,  11232,  12256,  13280,  14304,  15328,
743     16352,  18400,  20448,  22496,  24544,  26592,  28640,  30688,  32736,
744     36832,  40928,  45024,  49120,  53216,  57312,  61408,  65504,  73696,
745     81888,  90080,  98272,  106464, 114656, 122848, 131040, 147424, 163808,
746     180192, 196576, 212960, 229344, 245728,
747   };
748   const int tmp = (xsq_q10 >> 2) + 8;
749   const int k = get_msb(tmp) - 3;
750   const int xq = (k << 3) + ((tmp >> k) & 0x7);
751   const int one_q10 = 1 << 10;
752   const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k);
753   const int b_q10 = one_q10 - a_q10;
754   *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
755   *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
756 }
757 
av1_model_rd_from_var_lapndz(int64_t var,unsigned int n_log2,unsigned int qstep,int * rate,int64_t * dist)758 void av1_model_rd_from_var_lapndz(int64_t var, unsigned int n_log2,
759                                   unsigned int qstep, int *rate,
760                                   int64_t *dist) {
761   // This function models the rate and distortion for a Laplacian
762   // source with given variance when quantized with a uniform quantizer
763   // with given stepsize. The closed form expressions are in:
764   // Hang and Chen, "Source Model for transform video coder and its
765   // application - Part I: Fundamental Theory", IEEE Trans. Circ.
766   // Sys. for Video Tech., April 1997.
767   if (var == 0) {
768     *rate = 0;
769     *dist = 0;
770   } else {
771     int d_q10, r_q10;
772     static const uint32_t MAX_XSQ_Q10 = 245727;
773     const uint64_t xsq_q10_64 =
774         (((uint64_t)qstep * qstep << (n_log2 + 10)) + (var >> 1)) / var;
775     const int xsq_q10 = (int)AOMMIN(xsq_q10_64, MAX_XSQ_Q10);
776     model_rd_norm(xsq_q10, &r_q10, &d_q10);
777     *rate = ROUND_POWER_OF_TWO(r_q10 << n_log2, 10 - AV1_PROB_COST_SHIFT);
778     *dist = (var * (int64_t)d_q10 + 512) >> 10;
779   }
780 }
781 
interp_cubic(const double * p,double x)782 static double interp_cubic(const double *p, double x) {
783   return p[1] + 0.5 * x *
784                     (p[2] - p[0] +
785                      x * (2.0 * p[0] - 5.0 * p[1] + 4.0 * p[2] - p[3] +
786                           x * (3.0 * (p[1] - p[2]) + p[3] - p[0])));
787 }
788 
789 /*
790 static double interp_bicubic(const double *p, int p_stride, double x,
791                              double y) {
792   double q[4];
793   q[0] = interp_cubic(p, x);
794   q[1] = interp_cubic(p + p_stride, x);
795   q[2] = interp_cubic(p + 2 * p_stride, x);
796   q[3] = interp_cubic(p + 3 * p_stride, x);
797   return interp_cubic(q, y);
798 }
799 */
800 
801 static const uint8_t bsize_curvfit_model_cat_lookup[BLOCK_SIZES_ALL] = {
802   0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 1, 1, 2, 2, 3, 3
803 };
804 
sse_norm_curvfit_model_cat_lookup(double sse_norm)805 static int sse_norm_curvfit_model_cat_lookup(double sse_norm) {
806   return (sse_norm > 16.0);
807 }
808 
809 // Models distortion by sse using a logistic function on
810 // l = log2(sse / q^2) as:
811 // dbysse = 16 / (1 + k exp(l + c))
get_dbysse_logistic(double l,double c,double k)812 static double get_dbysse_logistic(double l, double c, double k) {
813   const double A = 16.0;
814   const double dbysse = A / (1 + k * exp(l + c));
815   return dbysse;
816 }
817 
818 // Models rate using a clamped linear function on
819 // l = log2(sse / q^2) as:
820 // rate = max(0, a + b * l)
get_rate_clamplinear(double l,double a,double b)821 static double get_rate_clamplinear(double l, double a, double b) {
822   const double rate = a + b * l;
823   return (rate < 0 ? 0 : rate);
824 }
825 
826 static const uint8_t bsize_surffit_model_cat_lookup[BLOCK_SIZES_ALL] = {
827   0, 0, 0, 0, 1, 1, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 0, 0, 2, 2, 4, 4
828 };
829 
830 static const double surffit_rate_params[9][4] = {
831   {
832       638.390212,
833       2.253108,
834       166.585650,
835       -3.939401,
836   },
837   {
838       5.256905,
839       81.997240,
840       -1.321771,
841       17.694216,
842   },
843   {
844       -74.193045,
845       72.431868,
846       -19.033152,
847       15.407276,
848   },
849   {
850       416.770113,
851       14.794188,
852       167.686830,
853       -6.997756,
854   },
855   {
856       378.511276,
857       9.558376,
858       154.658843,
859       -6.635663,
860   },
861   {
862       277.818787,
863       4.413180,
864       150.317637,
865       -9.893038,
866   },
867   {
868       142.212132,
869       11.542038,
870       94.393964,
871       -5.518517,
872   },
873   {
874       219.100256,
875       4.007421,
876       108.932852,
877       -6.981310,
878   },
879   {
880       222.261971,
881       3.251049,
882       95.972916,
883       -5.609789,
884   },
885 };
886 
887 static const double surffit_dist_params[7] = { 1.475844,  4.328362, -5.680233,
888                                                -0.500994, 0.554585, 4.839478,
889                                                -0.695837 };
890 
rate_surffit_model_params_lookup(BLOCK_SIZE bsize,double xm,double * rpar)891 static void rate_surffit_model_params_lookup(BLOCK_SIZE bsize, double xm,
892                                              double *rpar) {
893   const int cat = bsize_surffit_model_cat_lookup[bsize];
894   rpar[0] = surffit_rate_params[cat][0] + surffit_rate_params[cat][1] * xm;
895   rpar[1] = surffit_rate_params[cat][2] + surffit_rate_params[cat][3] * xm;
896 }
897 
dist_surffit_model_params_lookup(BLOCK_SIZE bsize,double xm,double * dpar)898 static void dist_surffit_model_params_lookup(BLOCK_SIZE bsize, double xm,
899                                              double *dpar) {
900   (void)bsize;
901   const double *params = surffit_dist_params;
902   dpar[0] = params[0] + params[1] / (1 + exp((xm + params[2]) * params[3]));
903   dpar[1] = params[4] + params[5] * exp(params[6] * xm);
904 }
905 
av1_model_rd_surffit(BLOCK_SIZE bsize,double sse_norm,double xm,double yl,double * rate_f,double * distbysse_f)906 void av1_model_rd_surffit(BLOCK_SIZE bsize, double sse_norm, double xm,
907                           double yl, double *rate_f, double *distbysse_f) {
908   (void)sse_norm;
909   double rpar[2], dpar[2];
910   rate_surffit_model_params_lookup(bsize, xm, rpar);
911   dist_surffit_model_params_lookup(bsize, xm, dpar);
912 
913   *rate_f = get_rate_clamplinear(yl, rpar[0], rpar[1]);
914   *distbysse_f = get_dbysse_logistic(yl, dpar[0], dpar[1]);
915 }
916 
917 static const double interp_rgrid_curv[4][65] = {
918   {
919       0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
920       0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
921       0.000000,    118.257702,  120.210658,  121.434853,  122.100487,
922       122.377758,  122.436865,  72.290102,   96.974289,   101.652727,
923       126.830141,  140.417377,  157.644879,  184.315291,  215.823873,
924       262.300169,  335.919859,  420.624173,  519.185032,  619.854243,
925       726.053595,  827.663369,  933.127475,  1037.988755, 1138.839609,
926       1233.342933, 1333.508064, 1428.760126, 1533.396364, 1616.952052,
927       1744.539319, 1803.413586, 1951.466618, 1994.227838, 2086.031680,
928       2148.635443, 2239.068450, 2222.590637, 2338.859809, 2402.929011,
929       2418.727875, 2435.342670, 2471.159469, 2523.187446, 2591.183827,
930       2674.905840, 2774.110714, 2888.555675, 3017.997952, 3162.194773,
931       3320.903365, 3493.880956, 3680.884773, 3881.672045, 4096.000000,
932   },
933   {
934       0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
935       0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
936       0.000000,    13.087244,   15.919735,   25.930313,   24.412411,
937       28.567417,   29.924194,   30.857010,   32.742979,   36.382570,
938       39.210386,   42.265690,   47.378572,   57.014850,   82.740067,
939       137.346562,  219.968084,  316.781856,  415.643773,  516.706538,
940       614.914364,  714.303763,  815.512135,  911.210485,  1008.501528,
941       1109.787854, 1213.772279, 1322.922561, 1414.752579, 1510.505641,
942       1615.741888, 1697.989032, 1780.123933, 1847.453790, 1913.742309,
943       1960.828122, 2047.500168, 2085.454095, 2129.230668, 2158.171824,
944       2182.231724, 2217.684864, 2269.589211, 2337.264824, 2420.618694,
945       2519.557814, 2633.989178, 2763.819779, 2908.956609, 3069.306660,
946       3244.776927, 3435.274401, 3640.706076, 3860.978945, 4096.000000,
947   },
948   {
949       0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
950       0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
951       0.000000,    4.656893,    5.123633,    5.594132,    6.162376,
952       6.918433,    7.768444,    8.739415,    10.105862,   11.477328,
953       13.236604,   15.421030,   19.093623,   25.801871,   46.724612,
954       98.841054,   181.113466,  272.586364,  359.499769,  445.546343,
955       525.944439,  605.188743,  681.793483,  756.668359,  838.486885,
956       926.950356,  1015.482542, 1113.353926, 1204.897193, 1288.871992,
957       1373.464145, 1455.746628, 1527.796460, 1588.475066, 1658.144771,
958       1710.302500, 1807.563351, 1863.197608, 1927.281616, 1964.450872,
959       2022.719898, 2100.041145, 2185.205712, 2280.993936, 2387.616216,
960       2505.282950, 2634.204540, 2774.591385, 2926.653884, 3090.602436,
961       3266.647443, 3454.999303, 3655.868416, 3869.465182, 4096.000000,
962   },
963   {
964       0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
965       0.000000,    0.000000,    0.000000,    0.000000,    0.000000,
966       0.000000,    0.337370,    0.391916,    0.468839,    0.566334,
967       0.762564,    1.069225,    1.384361,    1.787581,    2.293948,
968       3.251909,    4.412991,    8.050068,    11.606073,   27.668092,
969       65.227758,   128.463938,  202.097653,  262.715851,  312.464873,
970       355.601398,  400.609054,  447.201352,  495.761568,  552.871938,
971       619.067625,  691.984883,  773.753288,  860.628503,  946.262808,
972       1019.805896, 1106.061360, 1178.422145, 1244.852258, 1302.173987,
973       1399.650266, 1548.092912, 1545.928652, 1670.817500, 1694.523823,
974       1779.195362, 1882.155494, 1990.662097, 2108.325181, 2235.456119,
975       2372.366287, 2519.367059, 2676.769812, 2844.885918, 3024.026754,
976       3214.503695, 3416.628115, 3630.711389, 3857.064892, 4096.000000,
977   },
978 };
979 
980 static const double interp_dgrid_curv[3][65] = {
981   {
982       16.000000, 15.962891, 15.925174, 15.886888, 15.848074, 15.808770,
983       15.769015, 15.728850, 15.688313, 15.647445, 15.606284, 15.564870,
984       15.525918, 15.483820, 15.373330, 15.126844, 14.637442, 14.184387,
985       13.560070, 12.880717, 12.165995, 11.378144, 10.438769, 9.130790,
986       7.487633,  5.688649,  4.267515,  3.196300,  2.434201,  1.834064,
987       1.369920,  1.035921,  0.775279,  0.574895,  0.427232,  0.314123,
988       0.233236,  0.171440,  0.128188,  0.092762,  0.067569,  0.049324,
989       0.036330,  0.027008,  0.019853,  0.015539,  0.011093,  0.008733,
990       0.007624,  0.008105,  0.005427,  0.004065,  0.003427,  0.002848,
991       0.002328,  0.001865,  0.001457,  0.001103,  0.000801,  0.000550,
992       0.000348,  0.000193,  0.000085,  0.000021,  0.000000,
993   },
994   {
995       16.000000, 15.996116, 15.984769, 15.966413, 15.941505, 15.910501,
996       15.873856, 15.832026, 15.785466, 15.734633, 15.679981, 15.621967,
997       15.560961, 15.460157, 15.288367, 15.052462, 14.466922, 13.921212,
998       13.073692, 12.222005, 11.237799, 9.985848,  8.898823,  7.423519,
999       5.995325,  4.773152,  3.744032,  2.938217,  2.294526,  1.762412,
1000       1.327145,  1.020728,  0.765535,  0.570548,  0.425833,  0.313825,
1001       0.232959,  0.171324,  0.128174,  0.092750,  0.067558,  0.049319,
1002       0.036330,  0.027008,  0.019853,  0.015539,  0.011093,  0.008733,
1003       0.007624,  0.008105,  0.005427,  0.004065,  0.003427,  0.002848,
1004       0.002328,  0.001865,  0.001457,  0.001103,  0.000801,  0.000550,
1005       0.000348,  0.000193,  0.000085,  0.000021,  -0.000000,
1006   },
1007 };
1008 
av1_model_rd_curvfit(BLOCK_SIZE bsize,double sse_norm,double xqr,double * rate_f,double * distbysse_f)1009 void av1_model_rd_curvfit(BLOCK_SIZE bsize, double sse_norm, double xqr,
1010                           double *rate_f, double *distbysse_f) {
1011   const double x_start = -15.5;
1012   const double x_end = 16.5;
1013   const double x_step = 0.5;
1014   const double epsilon = 1e-6;
1015   const int rcat = bsize_curvfit_model_cat_lookup[bsize];
1016   const int dcat = sse_norm_curvfit_model_cat_lookup(sse_norm);
1017   (void)x_end;
1018 
1019   xqr = AOMMAX(xqr, x_start + x_step + epsilon);
1020   xqr = AOMMIN(xqr, x_end - x_step - epsilon);
1021   const double x = (xqr - x_start) / x_step;
1022   const int xi = (int)floor(x);
1023   const double xo = x - xi;
1024 
1025   assert(xi > 0);
1026 
1027   const double *prate = &interp_rgrid_curv[rcat][(xi - 1)];
1028   *rate_f = interp_cubic(prate, xo);
1029   const double *pdist = &interp_dgrid_curv[dcat][(xi - 1)];
1030   *distbysse_f = interp_cubic(pdist, xo);
1031 }
1032 
get_entropy_contexts_plane(BLOCK_SIZE plane_bsize,const struct macroblockd_plane * pd,ENTROPY_CONTEXT t_above[MAX_MIB_SIZE],ENTROPY_CONTEXT t_left[MAX_MIB_SIZE])1033 static void get_entropy_contexts_plane(BLOCK_SIZE plane_bsize,
1034                                        const struct macroblockd_plane *pd,
1035                                        ENTROPY_CONTEXT t_above[MAX_MIB_SIZE],
1036                                        ENTROPY_CONTEXT t_left[MAX_MIB_SIZE]) {
1037   const int num_4x4_w = mi_size_wide[plane_bsize];
1038   const int num_4x4_h = mi_size_high[plane_bsize];
1039   const ENTROPY_CONTEXT *const above = pd->above_entropy_context;
1040   const ENTROPY_CONTEXT *const left = pd->left_entropy_context;
1041 
1042   memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
1043   memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
1044 }
1045 
av1_get_entropy_contexts(BLOCK_SIZE plane_bsize,const struct macroblockd_plane * pd,ENTROPY_CONTEXT t_above[MAX_MIB_SIZE],ENTROPY_CONTEXT t_left[MAX_MIB_SIZE])1046 void av1_get_entropy_contexts(BLOCK_SIZE plane_bsize,
1047                               const struct macroblockd_plane *pd,
1048                               ENTROPY_CONTEXT t_above[MAX_MIB_SIZE],
1049                               ENTROPY_CONTEXT t_left[MAX_MIB_SIZE]) {
1050   assert(plane_bsize < BLOCK_SIZES_ALL);
1051   get_entropy_contexts_plane(plane_bsize, pd, t_above, t_left);
1052 }
1053 
av1_mv_pred(const AV1_COMP * cpi,MACROBLOCK * x,uint8_t * ref_y_buffer,int ref_y_stride,int ref_frame,BLOCK_SIZE block_size)1054 void av1_mv_pred(const AV1_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer,
1055                  int ref_y_stride, int ref_frame, BLOCK_SIZE block_size) {
1056   const MV_REFERENCE_FRAME ref_frames[2] = { ref_frame, NONE_FRAME };
1057   const int_mv ref_mv =
1058       av1_get_ref_mv_from_stack(0, ref_frames, 0, &x->mbmi_ext);
1059   const int_mv ref_mv1 =
1060       av1_get_ref_mv_from_stack(0, ref_frames, 1, &x->mbmi_ext);
1061   MV pred_mv[MAX_MV_REF_CANDIDATES + 1];
1062   int num_mv_refs = 0;
1063   pred_mv[num_mv_refs++] = ref_mv.as_mv;
1064   if (ref_mv.as_int != ref_mv1.as_int) {
1065     pred_mv[num_mv_refs++] = ref_mv1.as_mv;
1066   }
1067 
1068   assert(num_mv_refs <= (int)(sizeof(pred_mv) / sizeof(pred_mv[0])));
1069 
1070   const uint8_t *const src_y_ptr = x->plane[0].src.buf;
1071   int zero_seen = 0;
1072   int best_sad = INT_MAX;
1073   int max_mv = 0;
1074   // Get the sad for each candidate reference mv.
1075   for (int i = 0; i < num_mv_refs; ++i) {
1076     const MV *this_mv = &pred_mv[i];
1077     const int fp_row = (this_mv->row + 3 + (this_mv->row >= 0)) >> 3;
1078     const int fp_col = (this_mv->col + 3 + (this_mv->col >= 0)) >> 3;
1079     max_mv = AOMMAX(max_mv, AOMMAX(abs(this_mv->row), abs(this_mv->col)) >> 3);
1080 
1081     if (fp_row == 0 && fp_col == 0 && zero_seen) continue;
1082     zero_seen |= (fp_row == 0 && fp_col == 0);
1083 
1084     const uint8_t *const ref_y_ptr =
1085         &ref_y_buffer[ref_y_stride * fp_row + fp_col];
1086     // Find sad for current vector.
1087     const int this_sad = cpi->ppi->fn_ptr[block_size].sdf(
1088         src_y_ptr, x->plane[0].src.stride, ref_y_ptr, ref_y_stride);
1089     // Note if it is the best so far.
1090     if (this_sad < best_sad) {
1091       best_sad = this_sad;
1092     }
1093     if (i == 0)
1094       x->pred_mv0_sad[ref_frame] = this_sad;
1095     else if (i == 1)
1096       x->pred_mv1_sad[ref_frame] = this_sad;
1097   }
1098 
1099   // Note the index of the mv that worked best in the reference list.
1100   x->max_mv_context[ref_frame] = max_mv;
1101   x->pred_mv_sad[ref_frame] = best_sad;
1102 }
1103 
av1_setup_pred_block(const MACROBLOCKD * xd,struct buf_2d dst[MAX_MB_PLANE],const YV12_BUFFER_CONFIG * src,const struct scale_factors * scale,const struct scale_factors * scale_uv,const int num_planes)1104 void av1_setup_pred_block(const MACROBLOCKD *xd,
1105                           struct buf_2d dst[MAX_MB_PLANE],
1106                           const YV12_BUFFER_CONFIG *src,
1107                           const struct scale_factors *scale,
1108                           const struct scale_factors *scale_uv,
1109                           const int num_planes) {
1110   dst[0].buf = src->y_buffer;
1111   dst[0].stride = src->y_stride;
1112   dst[1].buf = src->u_buffer;
1113   dst[2].buf = src->v_buffer;
1114   dst[1].stride = dst[2].stride = src->uv_stride;
1115 
1116   const int mi_row = xd->mi_row;
1117   const int mi_col = xd->mi_col;
1118   for (int i = 0; i < num_planes; ++i) {
1119     setup_pred_plane(dst + i, xd->mi[0]->bsize, dst[i].buf,
1120                      i ? src->uv_crop_width : src->y_crop_width,
1121                      i ? src->uv_crop_height : src->y_crop_height,
1122                      dst[i].stride, mi_row, mi_col, i ? scale_uv : scale,
1123                      xd->plane[i].subsampling_x, xd->plane[i].subsampling_y);
1124   }
1125 }
1126 
av1_get_scaled_ref_frame(const AV1_COMP * cpi,int ref_frame)1127 YV12_BUFFER_CONFIG *av1_get_scaled_ref_frame(const AV1_COMP *cpi,
1128                                              int ref_frame) {
1129   assert(ref_frame >= LAST_FRAME && ref_frame <= ALTREF_FRAME);
1130   RefCntBuffer *const scaled_buf = cpi->scaled_ref_buf[ref_frame - 1];
1131   const RefCntBuffer *const ref_buf =
1132       get_ref_frame_buf(&cpi->common, ref_frame);
1133   return (scaled_buf != ref_buf && scaled_buf != NULL) ? &scaled_buf->buf
1134                                                        : NULL;
1135 }
1136 
av1_get_switchable_rate(const MACROBLOCK * x,const MACROBLOCKD * xd,InterpFilter interp_filter,int dual_filter)1137 int av1_get_switchable_rate(const MACROBLOCK *x, const MACROBLOCKD *xd,
1138                             InterpFilter interp_filter, int dual_filter) {
1139   if (interp_filter == SWITCHABLE) {
1140     const MB_MODE_INFO *const mbmi = xd->mi[0];
1141     int inter_filter_cost = 0;
1142     for (int dir = 0; dir < 2; ++dir) {
1143       if (dir && !dual_filter) break;
1144       const int ctx = av1_get_pred_context_switchable_interp(xd, dir);
1145       const InterpFilter filter =
1146           av1_extract_interp_filter(mbmi->interp_filters, dir);
1147       inter_filter_cost += x->mode_costs.switchable_interp_costs[ctx][filter];
1148     }
1149     return SWITCHABLE_INTERP_RATE_FACTOR * inter_filter_cost;
1150   } else {
1151     return 0;
1152   }
1153 }
1154 
av1_set_rd_speed_thresholds(AV1_COMP * cpi)1155 void av1_set_rd_speed_thresholds(AV1_COMP *cpi) {
1156   RD_OPT *const rd = &cpi->rd;
1157 
1158   // Set baseline threshold values.
1159   av1_zero(rd->thresh_mult);
1160 
1161   rd->thresh_mult[THR_NEARESTMV] = 300;
1162   rd->thresh_mult[THR_NEARESTL2] = 300;
1163   rd->thresh_mult[THR_NEARESTL3] = 300;
1164   rd->thresh_mult[THR_NEARESTB] = 300;
1165   rd->thresh_mult[THR_NEARESTA2] = 300;
1166   rd->thresh_mult[THR_NEARESTA] = 300;
1167   rd->thresh_mult[THR_NEARESTG] = 300;
1168 
1169   rd->thresh_mult[THR_NEWMV] = 1000;
1170   rd->thresh_mult[THR_NEWL2] = 1000;
1171   rd->thresh_mult[THR_NEWL3] = 1000;
1172   rd->thresh_mult[THR_NEWB] = 1000;
1173   rd->thresh_mult[THR_NEWA2] = 1100;
1174   rd->thresh_mult[THR_NEWA] = 1000;
1175   rd->thresh_mult[THR_NEWG] = 1000;
1176 
1177   rd->thresh_mult[THR_NEARMV] = 1000;
1178   rd->thresh_mult[THR_NEARL2] = 1000;
1179   rd->thresh_mult[THR_NEARL3] = 1000;
1180   rd->thresh_mult[THR_NEARB] = 1000;
1181   rd->thresh_mult[THR_NEARA2] = 1000;
1182   rd->thresh_mult[THR_NEARA] = 1000;
1183   rd->thresh_mult[THR_NEARG] = 1000;
1184 
1185   rd->thresh_mult[THR_GLOBALMV] = 2200;
1186   rd->thresh_mult[THR_GLOBALL2] = 2000;
1187   rd->thresh_mult[THR_GLOBALL3] = 2000;
1188   rd->thresh_mult[THR_GLOBALB] = 2400;
1189   rd->thresh_mult[THR_GLOBALA2] = 2000;
1190   rd->thresh_mult[THR_GLOBALG] = 2000;
1191   rd->thresh_mult[THR_GLOBALA] = 2400;
1192 
1193   rd->thresh_mult[THR_COMP_NEAREST_NEARESTLA] = 1100;
1194   rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2A] = 1000;
1195   rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3A] = 800;
1196   rd->thresh_mult[THR_COMP_NEAREST_NEARESTGA] = 900;
1197   rd->thresh_mult[THR_COMP_NEAREST_NEARESTLB] = 1000;
1198   rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2B] = 1000;
1199   rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3B] = 1000;
1200   rd->thresh_mult[THR_COMP_NEAREST_NEARESTGB] = 1000;
1201   rd->thresh_mult[THR_COMP_NEAREST_NEARESTLA2] = 1000;
1202   rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2A2] = 1000;
1203   rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3A2] = 1000;
1204   rd->thresh_mult[THR_COMP_NEAREST_NEARESTGA2] = 1000;
1205 
1206   rd->thresh_mult[THR_COMP_NEAREST_NEARESTLL2] = 2000;
1207   rd->thresh_mult[THR_COMP_NEAREST_NEARESTLL3] = 2000;
1208   rd->thresh_mult[THR_COMP_NEAREST_NEARESTLG] = 2000;
1209   rd->thresh_mult[THR_COMP_NEAREST_NEARESTBA] = 2000;
1210 
1211   rd->thresh_mult[THR_COMP_NEAR_NEARLA] = 1200;
1212   rd->thresh_mult[THR_COMP_NEAREST_NEWLA] = 1500;
1213   rd->thresh_mult[THR_COMP_NEW_NEARESTLA] = 1500;
1214   rd->thresh_mult[THR_COMP_NEAR_NEWLA] = 1530;
1215   rd->thresh_mult[THR_COMP_NEW_NEARLA] = 1870;
1216   rd->thresh_mult[THR_COMP_NEW_NEWLA] = 2400;
1217   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLA] = 2750;
1218 
1219   rd->thresh_mult[THR_COMP_NEAR_NEARL2A] = 1200;
1220   rd->thresh_mult[THR_COMP_NEAREST_NEWL2A] = 1500;
1221   rd->thresh_mult[THR_COMP_NEW_NEARESTL2A] = 1500;
1222   rd->thresh_mult[THR_COMP_NEAR_NEWL2A] = 1870;
1223   rd->thresh_mult[THR_COMP_NEW_NEARL2A] = 1700;
1224   rd->thresh_mult[THR_COMP_NEW_NEWL2A] = 1800;
1225   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL2A] = 2500;
1226 
1227   rd->thresh_mult[THR_COMP_NEAR_NEARL3A] = 1200;
1228   rd->thresh_mult[THR_COMP_NEAREST_NEWL3A] = 1500;
1229   rd->thresh_mult[THR_COMP_NEW_NEARESTL3A] = 1500;
1230   rd->thresh_mult[THR_COMP_NEAR_NEWL3A] = 1700;
1231   rd->thresh_mult[THR_COMP_NEW_NEARL3A] = 1700;
1232   rd->thresh_mult[THR_COMP_NEW_NEWL3A] = 2000;
1233   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL3A] = 3000;
1234 
1235   rd->thresh_mult[THR_COMP_NEAR_NEARGA] = 1320;
1236   rd->thresh_mult[THR_COMP_NEAREST_NEWGA] = 1500;
1237   rd->thresh_mult[THR_COMP_NEW_NEARESTGA] = 1500;
1238   rd->thresh_mult[THR_COMP_NEAR_NEWGA] = 2040;
1239   rd->thresh_mult[THR_COMP_NEW_NEARGA] = 1700;
1240   rd->thresh_mult[THR_COMP_NEW_NEWGA] = 2000;
1241   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGA] = 2250;
1242 
1243   rd->thresh_mult[THR_COMP_NEAR_NEARLB] = 1200;
1244   rd->thresh_mult[THR_COMP_NEAREST_NEWLB] = 1500;
1245   rd->thresh_mult[THR_COMP_NEW_NEARESTLB] = 1500;
1246   rd->thresh_mult[THR_COMP_NEAR_NEWLB] = 1360;
1247   rd->thresh_mult[THR_COMP_NEW_NEARLB] = 1700;
1248   rd->thresh_mult[THR_COMP_NEW_NEWLB] = 2400;
1249   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLB] = 2250;
1250 
1251   rd->thresh_mult[THR_COMP_NEAR_NEARL2B] = 1200;
1252   rd->thresh_mult[THR_COMP_NEAREST_NEWL2B] = 1500;
1253   rd->thresh_mult[THR_COMP_NEW_NEARESTL2B] = 1500;
1254   rd->thresh_mult[THR_COMP_NEAR_NEWL2B] = 1700;
1255   rd->thresh_mult[THR_COMP_NEW_NEARL2B] = 1700;
1256   rd->thresh_mult[THR_COMP_NEW_NEWL2B] = 2000;
1257   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL2B] = 2500;
1258 
1259   rd->thresh_mult[THR_COMP_NEAR_NEARL3B] = 1200;
1260   rd->thresh_mult[THR_COMP_NEAREST_NEWL3B] = 1500;
1261   rd->thresh_mult[THR_COMP_NEW_NEARESTL3B] = 1500;
1262   rd->thresh_mult[THR_COMP_NEAR_NEWL3B] = 1870;
1263   rd->thresh_mult[THR_COMP_NEW_NEARL3B] = 1700;
1264   rd->thresh_mult[THR_COMP_NEW_NEWL3B] = 2000;
1265   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL3B] = 2500;
1266 
1267   rd->thresh_mult[THR_COMP_NEAR_NEARGB] = 1200;
1268   rd->thresh_mult[THR_COMP_NEAREST_NEWGB] = 1500;
1269   rd->thresh_mult[THR_COMP_NEW_NEARESTGB] = 1500;
1270   rd->thresh_mult[THR_COMP_NEAR_NEWGB] = 1700;
1271   rd->thresh_mult[THR_COMP_NEW_NEARGB] = 1700;
1272   rd->thresh_mult[THR_COMP_NEW_NEWGB] = 2000;
1273   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGB] = 2500;
1274 
1275   rd->thresh_mult[THR_COMP_NEAR_NEARLA2] = 1200;
1276   rd->thresh_mult[THR_COMP_NEAREST_NEWLA2] = 1800;
1277   rd->thresh_mult[THR_COMP_NEW_NEARESTLA2] = 1500;
1278   rd->thresh_mult[THR_COMP_NEAR_NEWLA2] = 1700;
1279   rd->thresh_mult[THR_COMP_NEW_NEARLA2] = 1700;
1280   rd->thresh_mult[THR_COMP_NEW_NEWLA2] = 2000;
1281   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLA2] = 2500;
1282 
1283   rd->thresh_mult[THR_COMP_NEAR_NEARL2A2] = 1200;
1284   rd->thresh_mult[THR_COMP_NEAREST_NEWL2A2] = 1500;
1285   rd->thresh_mult[THR_COMP_NEW_NEARESTL2A2] = 1500;
1286   rd->thresh_mult[THR_COMP_NEAR_NEWL2A2] = 1700;
1287   rd->thresh_mult[THR_COMP_NEW_NEARL2A2] = 1700;
1288   rd->thresh_mult[THR_COMP_NEW_NEWL2A2] = 2000;
1289   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL2A2] = 2500;
1290 
1291   rd->thresh_mult[THR_COMP_NEAR_NEARL3A2] = 1440;
1292   rd->thresh_mult[THR_COMP_NEAREST_NEWL3A2] = 1500;
1293   rd->thresh_mult[THR_COMP_NEW_NEARESTL3A2] = 1500;
1294   rd->thresh_mult[THR_COMP_NEAR_NEWL3A2] = 1700;
1295   rd->thresh_mult[THR_COMP_NEW_NEARL3A2] = 1700;
1296   rd->thresh_mult[THR_COMP_NEW_NEWL3A2] = 2000;
1297   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL3A2] = 2500;
1298 
1299   rd->thresh_mult[THR_COMP_NEAR_NEARGA2] = 1200;
1300   rd->thresh_mult[THR_COMP_NEAREST_NEWGA2] = 1500;
1301   rd->thresh_mult[THR_COMP_NEW_NEARESTGA2] = 1500;
1302   rd->thresh_mult[THR_COMP_NEAR_NEWGA2] = 1700;
1303   rd->thresh_mult[THR_COMP_NEW_NEARGA2] = 1700;
1304   rd->thresh_mult[THR_COMP_NEW_NEWGA2] = 2000;
1305   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGA2] = 2750;
1306 
1307   rd->thresh_mult[THR_COMP_NEAR_NEARLL2] = 1600;
1308   rd->thresh_mult[THR_COMP_NEAREST_NEWLL2] = 2000;
1309   rd->thresh_mult[THR_COMP_NEW_NEARESTLL2] = 2000;
1310   rd->thresh_mult[THR_COMP_NEAR_NEWLL2] = 2640;
1311   rd->thresh_mult[THR_COMP_NEW_NEARLL2] = 2200;
1312   rd->thresh_mult[THR_COMP_NEW_NEWLL2] = 2400;
1313   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLL2] = 3200;
1314 
1315   rd->thresh_mult[THR_COMP_NEAR_NEARLL3] = 1600;
1316   rd->thresh_mult[THR_COMP_NEAREST_NEWLL3] = 2000;
1317   rd->thresh_mult[THR_COMP_NEW_NEARESTLL3] = 1800;
1318   rd->thresh_mult[THR_COMP_NEAR_NEWLL3] = 2200;
1319   rd->thresh_mult[THR_COMP_NEW_NEARLL3] = 2200;
1320   rd->thresh_mult[THR_COMP_NEW_NEWLL3] = 2400;
1321   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLL3] = 3200;
1322 
1323   rd->thresh_mult[THR_COMP_NEAR_NEARLG] = 1760;
1324   rd->thresh_mult[THR_COMP_NEAREST_NEWLG] = 2400;
1325   rd->thresh_mult[THR_COMP_NEW_NEARESTLG] = 2000;
1326   rd->thresh_mult[THR_COMP_NEAR_NEWLG] = 1760;
1327   rd->thresh_mult[THR_COMP_NEW_NEARLG] = 2640;
1328   rd->thresh_mult[THR_COMP_NEW_NEWLG] = 2400;
1329   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLG] = 3200;
1330 
1331   rd->thresh_mult[THR_COMP_NEAR_NEARBA] = 1600;
1332   rd->thresh_mult[THR_COMP_NEAREST_NEWBA] = 2000;
1333   rd->thresh_mult[THR_COMP_NEW_NEARESTBA] = 2000;
1334   rd->thresh_mult[THR_COMP_NEAR_NEWBA] = 2200;
1335   rd->thresh_mult[THR_COMP_NEW_NEARBA] = 1980;
1336   rd->thresh_mult[THR_COMP_NEW_NEWBA] = 2640;
1337   rd->thresh_mult[THR_COMP_GLOBAL_GLOBALBA] = 3200;
1338 
1339   rd->thresh_mult[THR_DC] = 1000;
1340   rd->thresh_mult[THR_PAETH] = 1000;
1341   rd->thresh_mult[THR_SMOOTH] = 2200;
1342   rd->thresh_mult[THR_SMOOTH_V] = 2000;
1343   rd->thresh_mult[THR_SMOOTH_H] = 2000;
1344   rd->thresh_mult[THR_H_PRED] = 2000;
1345   rd->thresh_mult[THR_V_PRED] = 1800;
1346   rd->thresh_mult[THR_D135_PRED] = 2500;
1347   rd->thresh_mult[THR_D203_PRED] = 2000;
1348   rd->thresh_mult[THR_D157_PRED] = 2500;
1349   rd->thresh_mult[THR_D67_PRED] = 2000;
1350   rd->thresh_mult[THR_D113_PRED] = 2500;
1351   rd->thresh_mult[THR_D45_PRED] = 2500;
1352 }
1353 
update_thr_fact(int (* factor_buf)[MAX_MODES],THR_MODES best_mode_index,THR_MODES mode_start,THR_MODES mode_end,BLOCK_SIZE min_size,BLOCK_SIZE max_size,int max_rd_thresh_factor)1354 static INLINE void update_thr_fact(int (*factor_buf)[MAX_MODES],
1355                                    THR_MODES best_mode_index,
1356                                    THR_MODES mode_start, THR_MODES mode_end,
1357                                    BLOCK_SIZE min_size, BLOCK_SIZE max_size,
1358                                    int max_rd_thresh_factor) {
1359   for (THR_MODES mode = mode_start; mode < mode_end; ++mode) {
1360     for (BLOCK_SIZE bs = min_size; bs <= max_size; ++bs) {
1361       int *const fact = &factor_buf[bs][mode];
1362       if (mode == best_mode_index) {
1363         *fact -= (*fact >> RD_THRESH_LOG_DEC_FACTOR);
1364       } else {
1365         *fact = AOMMIN(*fact + RD_THRESH_INC, max_rd_thresh_factor);
1366       }
1367     }
1368   }
1369 }
1370 
av1_update_rd_thresh_fact(const AV1_COMMON * const cm,int (* factor_buf)[MAX_MODES],int use_adaptive_rd_thresh,BLOCK_SIZE bsize,THR_MODES best_mode_index,THR_MODES inter_mode_start,THR_MODES inter_mode_end,THR_MODES intra_mode_start,THR_MODES intra_mode_end)1371 void av1_update_rd_thresh_fact(
1372     const AV1_COMMON *const cm, int (*factor_buf)[MAX_MODES],
1373     int use_adaptive_rd_thresh, BLOCK_SIZE bsize, THR_MODES best_mode_index,
1374     THR_MODES inter_mode_start, THR_MODES inter_mode_end,
1375     THR_MODES intra_mode_start, THR_MODES intra_mode_end) {
1376   assert(use_adaptive_rd_thresh > 0);
1377   const int max_rd_thresh_factor = use_adaptive_rd_thresh * RD_THRESH_MAX_FACT;
1378 
1379   const int bsize_is_1_to_4 = bsize > cm->seq_params->sb_size;
1380   BLOCK_SIZE min_size, max_size;
1381   if (bsize_is_1_to_4) {
1382     // This part handles block sizes with 1:4 and 4:1 aspect ratios
1383     // TODO(any): Experiment with threshold update for parent/child blocks
1384     min_size = bsize;
1385     max_size = bsize;
1386   } else {
1387     min_size = AOMMAX(bsize - 2, BLOCK_4X4);
1388     max_size = AOMMIN(bsize + 2, (int)cm->seq_params->sb_size);
1389   }
1390 
1391   update_thr_fact(factor_buf, best_mode_index, inter_mode_start, inter_mode_end,
1392                   min_size, max_size, max_rd_thresh_factor);
1393   update_thr_fact(factor_buf, best_mode_index, intra_mode_start, intra_mode_end,
1394                   min_size, max_size, max_rd_thresh_factor);
1395 }
1396 
av1_get_intra_cost_penalty(int qindex,int qdelta,aom_bit_depth_t bit_depth)1397 int av1_get_intra_cost_penalty(int qindex, int qdelta,
1398                                aom_bit_depth_t bit_depth) {
1399   const int q = av1_dc_quant_QTX(qindex, qdelta, bit_depth);
1400   switch (bit_depth) {
1401     case AOM_BITS_8: return 20 * q;
1402     case AOM_BITS_10: return 5 * q;
1403     case AOM_BITS_12: return ROUND_POWER_OF_TWO(5 * q, 2);
1404     default:
1405       assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
1406       return -1;
1407   }
1408 }
1409