1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <assert.h>
13 #include <math.h>
14 #include <stdio.h>
15
16 #include "config/av1_rtcd.h"
17
18 #include "aom_dsp/aom_dsp_common.h"
19 #include "aom_mem/aom_mem.h"
20 #include "aom_ports/bitops.h"
21 #include "aom_ports/mem.h"
22
23 #include "av1/common/common.h"
24 #include "av1/common/entropy.h"
25 #include "av1/common/entropymode.h"
26 #include "av1/common/mvref_common.h"
27 #include "av1/common/pred_common.h"
28 #include "av1/common/quant_common.h"
29 #include "av1/common/reconinter.h"
30 #include "av1/common/reconintra.h"
31 #include "av1/common/seg_common.h"
32
33 #include "av1/encoder/av1_quantize.h"
34 #include "av1/encoder/cost.h"
35 #include "av1/encoder/encodemb.h"
36 #include "av1/encoder/encodemv.h"
37 #include "av1/encoder/encoder.h"
38 #include "av1/encoder/encodetxb.h"
39 #include "av1/encoder/mcomp.h"
40 #include "av1/encoder/ratectrl.h"
41 #include "av1/encoder/rd.h"
42 #include "av1/encoder/tokenize.h"
43
44 #define RD_THRESH_POW 1.25
45
46 // The baseline rd thresholds for breaking out of the rd loop for
47 // certain modes are assumed to be based on 8x8 blocks.
48 // This table is used to correct for block size.
49 // The factors here are << 2 (2 = x0.5, 32 = x8 etc).
50 static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES_ALL] = {
51 2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32, 48, 48, 64, 4, 4, 8, 8, 16, 16
52 };
53
54 static const int use_intra_ext_tx_for_txsize[EXT_TX_SETS_INTRA]
55 [EXT_TX_SIZES] = {
56 { 1, 1, 1, 1 }, // unused
57 { 1, 1, 0, 0 },
58 { 0, 0, 1, 0 },
59 };
60
61 static const int use_inter_ext_tx_for_txsize[EXT_TX_SETS_INTER]
62 [EXT_TX_SIZES] = {
63 { 1, 1, 1, 1 }, // unused
64 { 1, 1, 0, 0 },
65 { 0, 0, 1, 0 },
66 { 0, 1, 1, 1 },
67 };
68
69 static const int av1_ext_tx_set_idx_to_type[2][AOMMAX(EXT_TX_SETS_INTRA,
70 EXT_TX_SETS_INTER)] = {
71 {
72 // Intra
73 EXT_TX_SET_DCTONLY,
74 EXT_TX_SET_DTT4_IDTX_1DDCT,
75 EXT_TX_SET_DTT4_IDTX,
76 },
77 {
78 // Inter
79 EXT_TX_SET_DCTONLY,
80 EXT_TX_SET_ALL16,
81 EXT_TX_SET_DTT9_IDTX_1DDCT,
82 EXT_TX_SET_DCT_IDTX,
83 },
84 };
85
av1_fill_mode_rates(AV1_COMMON * const cm,ModeCosts * mode_costs,FRAME_CONTEXT * fc)86 void av1_fill_mode_rates(AV1_COMMON *const cm, ModeCosts *mode_costs,
87 FRAME_CONTEXT *fc) {
88 int i, j;
89
90 for (i = 0; i < PARTITION_CONTEXTS; ++i)
91 av1_cost_tokens_from_cdf(mode_costs->partition_cost[i],
92 fc->partition_cdf[i], NULL);
93
94 if (cm->current_frame.skip_mode_info.skip_mode_flag) {
95 for (i = 0; i < SKIP_MODE_CONTEXTS; ++i) {
96 av1_cost_tokens_from_cdf(mode_costs->skip_mode_cost[i],
97 fc->skip_mode_cdfs[i], NULL);
98 }
99 }
100
101 for (i = 0; i < SKIP_CONTEXTS; ++i) {
102 av1_cost_tokens_from_cdf(mode_costs->skip_txfm_cost[i],
103 fc->skip_txfm_cdfs[i], NULL);
104 }
105
106 for (i = 0; i < KF_MODE_CONTEXTS; ++i)
107 for (j = 0; j < KF_MODE_CONTEXTS; ++j)
108 av1_cost_tokens_from_cdf(mode_costs->y_mode_costs[i][j],
109 fc->kf_y_cdf[i][j], NULL);
110
111 for (i = 0; i < BLOCK_SIZE_GROUPS; ++i)
112 av1_cost_tokens_from_cdf(mode_costs->mbmode_cost[i], fc->y_mode_cdf[i],
113 NULL);
114 for (i = 0; i < CFL_ALLOWED_TYPES; ++i)
115 for (j = 0; j < INTRA_MODES; ++j)
116 av1_cost_tokens_from_cdf(mode_costs->intra_uv_mode_cost[i][j],
117 fc->uv_mode_cdf[i][j], NULL);
118
119 av1_cost_tokens_from_cdf(mode_costs->filter_intra_mode_cost,
120 fc->filter_intra_mode_cdf, NULL);
121 for (i = 0; i < BLOCK_SIZES_ALL; ++i) {
122 if (av1_filter_intra_allowed_bsize(cm, i))
123 av1_cost_tokens_from_cdf(mode_costs->filter_intra_cost[i],
124 fc->filter_intra_cdfs[i], NULL);
125 }
126
127 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
128 av1_cost_tokens_from_cdf(mode_costs->switchable_interp_costs[i],
129 fc->switchable_interp_cdf[i], NULL);
130
131 for (i = 0; i < PALATTE_BSIZE_CTXS; ++i) {
132 av1_cost_tokens_from_cdf(mode_costs->palette_y_size_cost[i],
133 fc->palette_y_size_cdf[i], NULL);
134 av1_cost_tokens_from_cdf(mode_costs->palette_uv_size_cost[i],
135 fc->palette_uv_size_cdf[i], NULL);
136 for (j = 0; j < PALETTE_Y_MODE_CONTEXTS; ++j) {
137 av1_cost_tokens_from_cdf(mode_costs->palette_y_mode_cost[i][j],
138 fc->palette_y_mode_cdf[i][j], NULL);
139 }
140 }
141
142 for (i = 0; i < PALETTE_UV_MODE_CONTEXTS; ++i) {
143 av1_cost_tokens_from_cdf(mode_costs->palette_uv_mode_cost[i],
144 fc->palette_uv_mode_cdf[i], NULL);
145 }
146
147 for (i = 0; i < PALETTE_SIZES; ++i) {
148 for (j = 0; j < PALETTE_COLOR_INDEX_CONTEXTS; ++j) {
149 av1_cost_tokens_from_cdf(mode_costs->palette_y_color_cost[i][j],
150 fc->palette_y_color_index_cdf[i][j], NULL);
151 av1_cost_tokens_from_cdf(mode_costs->palette_uv_color_cost[i][j],
152 fc->palette_uv_color_index_cdf[i][j], NULL);
153 }
154 }
155
156 int sign_cost[CFL_JOINT_SIGNS];
157 av1_cost_tokens_from_cdf(sign_cost, fc->cfl_sign_cdf, NULL);
158 for (int joint_sign = 0; joint_sign < CFL_JOINT_SIGNS; joint_sign++) {
159 int *cost_u = mode_costs->cfl_cost[joint_sign][CFL_PRED_U];
160 int *cost_v = mode_costs->cfl_cost[joint_sign][CFL_PRED_V];
161 if (CFL_SIGN_U(joint_sign) == CFL_SIGN_ZERO) {
162 memset(cost_u, 0, CFL_ALPHABET_SIZE * sizeof(*cost_u));
163 } else {
164 const aom_cdf_prob *cdf_u = fc->cfl_alpha_cdf[CFL_CONTEXT_U(joint_sign)];
165 av1_cost_tokens_from_cdf(cost_u, cdf_u, NULL);
166 }
167 if (CFL_SIGN_V(joint_sign) == CFL_SIGN_ZERO) {
168 memset(cost_v, 0, CFL_ALPHABET_SIZE * sizeof(*cost_v));
169 } else {
170 const aom_cdf_prob *cdf_v = fc->cfl_alpha_cdf[CFL_CONTEXT_V(joint_sign)];
171 av1_cost_tokens_from_cdf(cost_v, cdf_v, NULL);
172 }
173 for (int u = 0; u < CFL_ALPHABET_SIZE; u++)
174 cost_u[u] += sign_cost[joint_sign];
175 }
176
177 for (i = 0; i < MAX_TX_CATS; ++i)
178 for (j = 0; j < TX_SIZE_CONTEXTS; ++j)
179 av1_cost_tokens_from_cdf(mode_costs->tx_size_cost[i][j],
180 fc->tx_size_cdf[i][j], NULL);
181
182 for (i = 0; i < TXFM_PARTITION_CONTEXTS; ++i) {
183 av1_cost_tokens_from_cdf(mode_costs->txfm_partition_cost[i],
184 fc->txfm_partition_cdf[i], NULL);
185 }
186
187 for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
188 int s;
189 for (s = 1; s < EXT_TX_SETS_INTER; ++s) {
190 if (use_inter_ext_tx_for_txsize[s][i]) {
191 av1_cost_tokens_from_cdf(
192 mode_costs->inter_tx_type_costs[s][i], fc->inter_ext_tx_cdf[s][i],
193 av1_ext_tx_inv[av1_ext_tx_set_idx_to_type[1][s]]);
194 }
195 }
196 for (s = 1; s < EXT_TX_SETS_INTRA; ++s) {
197 if (use_intra_ext_tx_for_txsize[s][i]) {
198 for (j = 0; j < INTRA_MODES; ++j) {
199 av1_cost_tokens_from_cdf(
200 mode_costs->intra_tx_type_costs[s][i][j],
201 fc->intra_ext_tx_cdf[s][i][j],
202 av1_ext_tx_inv[av1_ext_tx_set_idx_to_type[0][s]]);
203 }
204 }
205 }
206 }
207 for (i = 0; i < DIRECTIONAL_MODES; ++i) {
208 av1_cost_tokens_from_cdf(mode_costs->angle_delta_cost[i],
209 fc->angle_delta_cdf[i], NULL);
210 }
211 av1_cost_tokens_from_cdf(mode_costs->intrabc_cost, fc->intrabc_cdf, NULL);
212
213 if (!frame_is_intra_only(cm)) {
214 for (i = 0; i < COMP_INTER_CONTEXTS; ++i) {
215 av1_cost_tokens_from_cdf(mode_costs->comp_inter_cost[i],
216 fc->comp_inter_cdf[i], NULL);
217 }
218
219 for (i = 0; i < REF_CONTEXTS; ++i) {
220 for (j = 0; j < SINGLE_REFS - 1; ++j) {
221 av1_cost_tokens_from_cdf(mode_costs->single_ref_cost[i][j],
222 fc->single_ref_cdf[i][j], NULL);
223 }
224 }
225
226 for (i = 0; i < COMP_REF_TYPE_CONTEXTS; ++i) {
227 av1_cost_tokens_from_cdf(mode_costs->comp_ref_type_cost[i],
228 fc->comp_ref_type_cdf[i], NULL);
229 }
230
231 for (i = 0; i < UNI_COMP_REF_CONTEXTS; ++i) {
232 for (j = 0; j < UNIDIR_COMP_REFS - 1; ++j) {
233 av1_cost_tokens_from_cdf(mode_costs->uni_comp_ref_cost[i][j],
234 fc->uni_comp_ref_cdf[i][j], NULL);
235 }
236 }
237
238 for (i = 0; i < REF_CONTEXTS; ++i) {
239 for (j = 0; j < FWD_REFS - 1; ++j) {
240 av1_cost_tokens_from_cdf(mode_costs->comp_ref_cost[i][j],
241 fc->comp_ref_cdf[i][j], NULL);
242 }
243 }
244
245 for (i = 0; i < REF_CONTEXTS; ++i) {
246 for (j = 0; j < BWD_REFS - 1; ++j) {
247 av1_cost_tokens_from_cdf(mode_costs->comp_bwdref_cost[i][j],
248 fc->comp_bwdref_cdf[i][j], NULL);
249 }
250 }
251
252 for (i = 0; i < INTRA_INTER_CONTEXTS; ++i) {
253 av1_cost_tokens_from_cdf(mode_costs->intra_inter_cost[i],
254 fc->intra_inter_cdf[i], NULL);
255 }
256
257 for (i = 0; i < NEWMV_MODE_CONTEXTS; ++i) {
258 av1_cost_tokens_from_cdf(mode_costs->newmv_mode_cost[i], fc->newmv_cdf[i],
259 NULL);
260 }
261
262 for (i = 0; i < GLOBALMV_MODE_CONTEXTS; ++i) {
263 av1_cost_tokens_from_cdf(mode_costs->zeromv_mode_cost[i],
264 fc->zeromv_cdf[i], NULL);
265 }
266
267 for (i = 0; i < REFMV_MODE_CONTEXTS; ++i) {
268 av1_cost_tokens_from_cdf(mode_costs->refmv_mode_cost[i], fc->refmv_cdf[i],
269 NULL);
270 }
271
272 for (i = 0; i < DRL_MODE_CONTEXTS; ++i) {
273 av1_cost_tokens_from_cdf(mode_costs->drl_mode_cost0[i], fc->drl_cdf[i],
274 NULL);
275 }
276 for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
277 av1_cost_tokens_from_cdf(mode_costs->inter_compound_mode_cost[i],
278 fc->inter_compound_mode_cdf[i], NULL);
279 for (i = 0; i < BLOCK_SIZES_ALL; ++i)
280 av1_cost_tokens_from_cdf(mode_costs->compound_type_cost[i],
281 fc->compound_type_cdf[i], NULL);
282 for (i = 0; i < BLOCK_SIZES_ALL; ++i) {
283 if (av1_is_wedge_used(i)) {
284 av1_cost_tokens_from_cdf(mode_costs->wedge_idx_cost[i],
285 fc->wedge_idx_cdf[i], NULL);
286 }
287 }
288 for (i = 0; i < BLOCK_SIZE_GROUPS; ++i) {
289 av1_cost_tokens_from_cdf(mode_costs->interintra_cost[i],
290 fc->interintra_cdf[i], NULL);
291 av1_cost_tokens_from_cdf(mode_costs->interintra_mode_cost[i],
292 fc->interintra_mode_cdf[i], NULL);
293 }
294 for (i = 0; i < BLOCK_SIZES_ALL; ++i) {
295 av1_cost_tokens_from_cdf(mode_costs->wedge_interintra_cost[i],
296 fc->wedge_interintra_cdf[i], NULL);
297 }
298 for (i = BLOCK_8X8; i < BLOCK_SIZES_ALL; i++) {
299 av1_cost_tokens_from_cdf(mode_costs->motion_mode_cost[i],
300 fc->motion_mode_cdf[i], NULL);
301 }
302 for (i = BLOCK_8X8; i < BLOCK_SIZES_ALL; i++) {
303 av1_cost_tokens_from_cdf(mode_costs->motion_mode_cost1[i],
304 fc->obmc_cdf[i], NULL);
305 }
306 for (i = 0; i < COMP_INDEX_CONTEXTS; ++i) {
307 av1_cost_tokens_from_cdf(mode_costs->comp_idx_cost[i],
308 fc->compound_index_cdf[i], NULL);
309 }
310 for (i = 0; i < COMP_GROUP_IDX_CONTEXTS; ++i) {
311 av1_cost_tokens_from_cdf(mode_costs->comp_group_idx_cost[i],
312 fc->comp_group_idx_cdf[i], NULL);
313 }
314 }
315 }
316
av1_fill_lr_rates(ModeCosts * mode_costs,FRAME_CONTEXT * fc)317 void av1_fill_lr_rates(ModeCosts *mode_costs, FRAME_CONTEXT *fc) {
318 av1_cost_tokens_from_cdf(mode_costs->switchable_restore_cost,
319 fc->switchable_restore_cdf, NULL);
320 av1_cost_tokens_from_cdf(mode_costs->wiener_restore_cost,
321 fc->wiener_restore_cdf, NULL);
322 av1_cost_tokens_from_cdf(mode_costs->sgrproj_restore_cost,
323 fc->sgrproj_restore_cdf, NULL);
324 }
325
326 // Values are now correlated to quantizer.
327 static int sad_per_bit_lut_8[QINDEX_RANGE];
328 static int sad_per_bit_lut_10[QINDEX_RANGE];
329 static int sad_per_bit_lut_12[QINDEX_RANGE];
330
init_me_luts_bd(int * bit16lut,int range,aom_bit_depth_t bit_depth)331 static void init_me_luts_bd(int *bit16lut, int range,
332 aom_bit_depth_t bit_depth) {
333 int i;
334 // Initialize the sad lut tables using a formulaic calculation for now.
335 // This is to make it easier to resolve the impact of experimental changes
336 // to the quantizer tables.
337 for (i = 0; i < range; i++) {
338 const double q = av1_convert_qindex_to_q(i, bit_depth);
339 bit16lut[i] = (int)(0.0418 * q + 2.4107);
340 }
341 }
342
av1_init_me_luts(void)343 void av1_init_me_luts(void) {
344 init_me_luts_bd(sad_per_bit_lut_8, QINDEX_RANGE, AOM_BITS_8);
345 init_me_luts_bd(sad_per_bit_lut_10, QINDEX_RANGE, AOM_BITS_10);
346 init_me_luts_bd(sad_per_bit_lut_12, QINDEX_RANGE, AOM_BITS_12);
347 }
348
349 static const int rd_boost_factor[16] = { 64, 32, 32, 32, 24, 16, 12, 12,
350 8, 8, 4, 4, 2, 2, 1, 0 };
351
352 static const int rd_layer_depth_factor[7] = {
353 160, 160, 160, 160, 192, 208, 224
354 };
355
356 // Returns the default rd multiplier for inter frames for a given qindex.
357 // The function here is a first pass estimate based on data from
358 // a previous Vizer run
def_inter_rd_multiplier(int qindex)359 static double def_inter_rd_multiplier(int qindex) {
360 return 3.2 + (0.0035 * (double)qindex);
361 }
362
363 // Returns the default rd multiplier for ARF/Golden Frames for a given qindex.
364 // The function here is a first pass estimate based on data from
365 // a previous Vizer run
def_arf_rd_multiplier(int qindex)366 static double def_arf_rd_multiplier(int qindex) {
367 return 3.25 + (0.0035 * (double)qindex);
368 }
369
370 // Returns the default rd multiplier for key frames for a given qindex.
371 // The function here is a first pass estimate based on data from
372 // a previous Vizer run
def_kf_rd_multiplier(int qindex)373 static double def_kf_rd_multiplier(int qindex) {
374 return 3.3 + (0.0035 * (double)qindex);
375 }
376
av1_compute_rd_mult_based_on_qindex(aom_bit_depth_t bit_depth,FRAME_UPDATE_TYPE update_type,int qindex)377 int av1_compute_rd_mult_based_on_qindex(aom_bit_depth_t bit_depth,
378 FRAME_UPDATE_TYPE update_type,
379 int qindex) {
380 const int q = av1_dc_quant_QTX(qindex, 0, bit_depth);
381 int rdmult = q * q;
382 if (update_type == KF_UPDATE) {
383 double def_rd_q_mult = def_kf_rd_multiplier(qindex);
384 rdmult = (int)((double)rdmult * def_rd_q_mult);
385 } else if ((update_type == GF_UPDATE) || (update_type == ARF_UPDATE)) {
386 double def_rd_q_mult = def_arf_rd_multiplier(qindex);
387 rdmult = (int)((double)rdmult * def_rd_q_mult);
388 } else {
389 double def_rd_q_mult = def_inter_rd_multiplier(qindex);
390 rdmult = (int)((double)rdmult * def_rd_q_mult);
391 }
392
393 switch (bit_depth) {
394 case AOM_BITS_8: break;
395 case AOM_BITS_10: rdmult = ROUND_POWER_OF_TWO(rdmult, 4); break;
396 case AOM_BITS_12: rdmult = ROUND_POWER_OF_TWO(rdmult, 8); break;
397 default:
398 assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
399 return -1;
400 }
401 return rdmult > 0 ? rdmult : 1;
402 }
403
av1_compute_rd_mult(const AV1_COMP * cpi,int qindex)404 int av1_compute_rd_mult(const AV1_COMP *cpi, int qindex) {
405 const aom_bit_depth_t bit_depth = cpi->common.seq_params->bit_depth;
406 const FRAME_UPDATE_TYPE update_type =
407 cpi->ppi->gf_group.update_type[cpi->gf_frame_index];
408 int64_t rdmult =
409 av1_compute_rd_mult_based_on_qindex(bit_depth, update_type, qindex);
410 if (is_stat_consumption_stage(cpi) && !cpi->oxcf.q_cfg.use_fixed_qp_offsets &&
411 (cpi->common.current_frame.frame_type != KEY_FRAME)) {
412 const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
413 const int boost_index = AOMMIN(15, (cpi->ppi->p_rc.gfu_boost / 100));
414 const int layer_depth =
415 AOMMIN(gf_group->layer_depth[cpi->gf_frame_index], 6);
416
417 // Layer depth adjustment
418 rdmult = (rdmult * rd_layer_depth_factor[layer_depth]) >> 7;
419
420 // ARF boost adjustment
421 rdmult += ((rdmult * rd_boost_factor[boost_index]) >> 7);
422 }
423 return (int)rdmult;
424 }
425
av1_get_deltaq_offset(aom_bit_depth_t bit_depth,int qindex,double beta)426 int av1_get_deltaq_offset(aom_bit_depth_t bit_depth, int qindex, double beta) {
427 assert(beta > 0.0);
428 int q = av1_dc_quant_QTX(qindex, 0, bit_depth);
429 int newq = (int)rint(q / sqrt(beta));
430 int orig_qindex = qindex;
431 if (newq == q) {
432 return 0;
433 }
434 if (newq < q) {
435 while (qindex > 0) {
436 qindex--;
437 q = av1_dc_quant_QTX(qindex, 0, bit_depth);
438 if (newq >= q) {
439 break;
440 }
441 }
442 } else {
443 while (qindex < MAXQ) {
444 qindex++;
445 q = av1_dc_quant_QTX(qindex, 0, bit_depth);
446 if (newq <= q) {
447 break;
448 }
449 }
450 }
451 return qindex - orig_qindex;
452 }
453
av1_adjust_q_from_delta_q_res(int delta_q_res,int prev_qindex,int curr_qindex)454 int av1_adjust_q_from_delta_q_res(int delta_q_res, int prev_qindex,
455 int curr_qindex) {
456 curr_qindex = clamp(curr_qindex, delta_q_res, 256 - delta_q_res);
457 const int sign_deltaq_index = curr_qindex - prev_qindex >= 0 ? 1 : -1;
458 const int deltaq_deadzone = delta_q_res / 4;
459 const int qmask = ~(delta_q_res - 1);
460 int abs_deltaq_index = abs(curr_qindex - prev_qindex);
461 abs_deltaq_index = (abs_deltaq_index + deltaq_deadzone) & qmask;
462 int adjust_qindex = prev_qindex + sign_deltaq_index * abs_deltaq_index;
463 adjust_qindex = AOMMAX(adjust_qindex, MINQ + 1);
464 return adjust_qindex;
465 }
466
av1_get_adaptive_rdmult(const AV1_COMP * cpi,double beta)467 int av1_get_adaptive_rdmult(const AV1_COMP *cpi, double beta) {
468 assert(beta > 0.0);
469 const AV1_COMMON *cm = &cpi->common;
470 int q = av1_dc_quant_QTX(cm->quant_params.base_qindex, 0,
471 cm->seq_params->bit_depth);
472
473 return (int)(av1_compute_rd_mult(cpi, q) / beta);
474 }
475
compute_rd_thresh_factor(int qindex,aom_bit_depth_t bit_depth)476 static int compute_rd_thresh_factor(int qindex, aom_bit_depth_t bit_depth) {
477 double q;
478 switch (bit_depth) {
479 case AOM_BITS_8: q = av1_dc_quant_QTX(qindex, 0, AOM_BITS_8) / 4.0; break;
480 case AOM_BITS_10:
481 q = av1_dc_quant_QTX(qindex, 0, AOM_BITS_10) / 16.0;
482 break;
483 case AOM_BITS_12:
484 q = av1_dc_quant_QTX(qindex, 0, AOM_BITS_12) / 64.0;
485 break;
486 default:
487 assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
488 return -1;
489 }
490 // TODO(debargha): Adjust the function below.
491 return AOMMAX((int)(pow(q, RD_THRESH_POW) * 5.12), 8);
492 }
493
av1_set_sad_per_bit(const AV1_COMP * cpi,int * sadperbit,int qindex)494 void av1_set_sad_per_bit(const AV1_COMP *cpi, int *sadperbit, int qindex) {
495 switch (cpi->common.seq_params->bit_depth) {
496 case AOM_BITS_8: *sadperbit = sad_per_bit_lut_8[qindex]; break;
497 case AOM_BITS_10: *sadperbit = sad_per_bit_lut_10[qindex]; break;
498 case AOM_BITS_12: *sadperbit = sad_per_bit_lut_12[qindex]; break;
499 default:
500 assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
501 }
502 }
503
set_block_thresholds(const AV1_COMMON * cm,RD_OPT * rd)504 static void set_block_thresholds(const AV1_COMMON *cm, RD_OPT *rd) {
505 int i, bsize, segment_id;
506
507 for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
508 const int qindex = clamp(
509 av1_get_qindex(&cm->seg, segment_id, cm->quant_params.base_qindex) +
510 cm->quant_params.y_dc_delta_q,
511 0, MAXQ);
512 const int q = compute_rd_thresh_factor(qindex, cm->seq_params->bit_depth);
513
514 for (bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
515 // Threshold here seems unnecessarily harsh but fine given actual
516 // range of values used for cpi->sf.thresh_mult[].
517 const int t = q * rd_thresh_block_size_factor[bsize];
518 const int thresh_max = INT_MAX / t;
519
520 for (i = 0; i < MAX_MODES; ++i)
521 rd->threshes[segment_id][bsize][i] = rd->thresh_mult[i] < thresh_max
522 ? rd->thresh_mult[i] * t / 4
523 : INT_MAX;
524 }
525 }
526 }
527
av1_fill_coeff_costs(CoeffCosts * coeff_costs,FRAME_CONTEXT * fc,const int num_planes)528 void av1_fill_coeff_costs(CoeffCosts *coeff_costs, FRAME_CONTEXT *fc,
529 const int num_planes) {
530 const int nplanes = AOMMIN(num_planes, PLANE_TYPES);
531 for (int eob_multi_size = 0; eob_multi_size < 7; ++eob_multi_size) {
532 for (int plane = 0; plane < nplanes; ++plane) {
533 LV_MAP_EOB_COST *pcost = &coeff_costs->eob_costs[eob_multi_size][plane];
534
535 for (int ctx = 0; ctx < 2; ++ctx) {
536 aom_cdf_prob *pcdf;
537 switch (eob_multi_size) {
538 case 0: pcdf = fc->eob_flag_cdf16[plane][ctx]; break;
539 case 1: pcdf = fc->eob_flag_cdf32[plane][ctx]; break;
540 case 2: pcdf = fc->eob_flag_cdf64[plane][ctx]; break;
541 case 3: pcdf = fc->eob_flag_cdf128[plane][ctx]; break;
542 case 4: pcdf = fc->eob_flag_cdf256[plane][ctx]; break;
543 case 5: pcdf = fc->eob_flag_cdf512[plane][ctx]; break;
544 case 6:
545 default: pcdf = fc->eob_flag_cdf1024[plane][ctx]; break;
546 }
547 av1_cost_tokens_from_cdf(pcost->eob_cost[ctx], pcdf, NULL);
548 }
549 }
550 }
551 for (int tx_size = 0; tx_size < TX_SIZES; ++tx_size) {
552 for (int plane = 0; plane < nplanes; ++plane) {
553 LV_MAP_COEFF_COST *pcost = &coeff_costs->coeff_costs[tx_size][plane];
554
555 for (int ctx = 0; ctx < TXB_SKIP_CONTEXTS; ++ctx)
556 av1_cost_tokens_from_cdf(pcost->txb_skip_cost[ctx],
557 fc->txb_skip_cdf[tx_size][ctx], NULL);
558
559 for (int ctx = 0; ctx < SIG_COEF_CONTEXTS_EOB; ++ctx)
560 av1_cost_tokens_from_cdf(pcost->base_eob_cost[ctx],
561 fc->coeff_base_eob_cdf[tx_size][plane][ctx],
562 NULL);
563 for (int ctx = 0; ctx < SIG_COEF_CONTEXTS; ++ctx)
564 av1_cost_tokens_from_cdf(pcost->base_cost[ctx],
565 fc->coeff_base_cdf[tx_size][plane][ctx], NULL);
566
567 for (int ctx = 0; ctx < SIG_COEF_CONTEXTS; ++ctx) {
568 pcost->base_cost[ctx][4] = 0;
569 pcost->base_cost[ctx][5] = pcost->base_cost[ctx][1] +
570 av1_cost_literal(1) -
571 pcost->base_cost[ctx][0];
572 pcost->base_cost[ctx][6] =
573 pcost->base_cost[ctx][2] - pcost->base_cost[ctx][1];
574 pcost->base_cost[ctx][7] =
575 pcost->base_cost[ctx][3] - pcost->base_cost[ctx][2];
576 }
577
578 for (int ctx = 0; ctx < EOB_COEF_CONTEXTS; ++ctx)
579 av1_cost_tokens_from_cdf(pcost->eob_extra_cost[ctx],
580 fc->eob_extra_cdf[tx_size][plane][ctx], NULL);
581
582 for (int ctx = 0; ctx < DC_SIGN_CONTEXTS; ++ctx)
583 av1_cost_tokens_from_cdf(pcost->dc_sign_cost[ctx],
584 fc->dc_sign_cdf[plane][ctx], NULL);
585
586 for (int ctx = 0; ctx < LEVEL_CONTEXTS; ++ctx) {
587 int br_rate[BR_CDF_SIZE];
588 int prev_cost = 0;
589 int i, j;
590 av1_cost_tokens_from_cdf(
591 br_rate, fc->coeff_br_cdf[AOMMIN(tx_size, TX_32X32)][plane][ctx],
592 NULL);
593 // printf("br_rate: ");
594 // for(j = 0; j < BR_CDF_SIZE; j++)
595 // printf("%4d ", br_rate[j]);
596 // printf("\n");
597 for (i = 0; i < COEFF_BASE_RANGE; i += BR_CDF_SIZE - 1) {
598 for (j = 0; j < BR_CDF_SIZE - 1; j++) {
599 pcost->lps_cost[ctx][i + j] = prev_cost + br_rate[j];
600 }
601 prev_cost += br_rate[j];
602 }
603 pcost->lps_cost[ctx][i] = prev_cost;
604 // printf("lps_cost: %d %d %2d : ", tx_size, plane, ctx);
605 // for (i = 0; i <= COEFF_BASE_RANGE; i++)
606 // printf("%5d ", pcost->lps_cost[ctx][i]);
607 // printf("\n");
608 }
609 for (int ctx = 0; ctx < LEVEL_CONTEXTS; ++ctx) {
610 pcost->lps_cost[ctx][0 + COEFF_BASE_RANGE + 1] =
611 pcost->lps_cost[ctx][0];
612 for (int i = 1; i <= COEFF_BASE_RANGE; ++i) {
613 pcost->lps_cost[ctx][i + COEFF_BASE_RANGE + 1] =
614 pcost->lps_cost[ctx][i] - pcost->lps_cost[ctx][i - 1];
615 }
616 }
617 }
618 }
619 }
620
av1_fill_mv_costs(const nmv_context * nmvc,int integer_mv,int usehp,MvCosts * mv_costs)621 void av1_fill_mv_costs(const nmv_context *nmvc, int integer_mv, int usehp,
622 MvCosts *mv_costs) {
623 mv_costs->nmv_cost[0] = &mv_costs->nmv_cost_alloc[0][MV_MAX];
624 mv_costs->nmv_cost[1] = &mv_costs->nmv_cost_alloc[1][MV_MAX];
625 mv_costs->nmv_cost_hp[0] = &mv_costs->nmv_cost_hp_alloc[0][MV_MAX];
626 mv_costs->nmv_cost_hp[1] = &mv_costs->nmv_cost_hp_alloc[1][MV_MAX];
627 if (integer_mv) {
628 mv_costs->mv_cost_stack = (int **)&mv_costs->nmv_cost;
629 av1_build_nmv_cost_table(mv_costs->nmv_joint_cost, mv_costs->mv_cost_stack,
630 nmvc, MV_SUBPEL_NONE);
631 } else {
632 mv_costs->mv_cost_stack =
633 usehp ? mv_costs->nmv_cost_hp : mv_costs->nmv_cost;
634 av1_build_nmv_cost_table(mv_costs->nmv_joint_cost, mv_costs->mv_cost_stack,
635 nmvc, usehp);
636 }
637 }
638
av1_fill_dv_costs(const nmv_context * ndvc,IntraBCMVCosts * dv_costs)639 void av1_fill_dv_costs(const nmv_context *ndvc, IntraBCMVCosts *dv_costs) {
640 dv_costs->dv_costs[0] = &dv_costs->dv_costs_alloc[0][MV_MAX];
641 dv_costs->dv_costs[1] = &dv_costs->dv_costs_alloc[1][MV_MAX];
642 av1_build_nmv_cost_table(dv_costs->joint_mv, dv_costs->dv_costs, ndvc,
643 MV_SUBPEL_NONE);
644 }
645
av1_initialize_rd_consts(AV1_COMP * cpi)646 void av1_initialize_rd_consts(AV1_COMP *cpi) {
647 AV1_COMMON *const cm = &cpi->common;
648 MACROBLOCK *const x = &cpi->td.mb;
649 RD_OPT *const rd = &cpi->rd;
650 MvCosts *mv_costs = x->mv_costs;
651 int use_nonrd_pick_mode = cpi->sf.rt_sf.use_nonrd_pick_mode;
652 CostUpdateFreq cost_upd_freq = cpi->oxcf.cost_upd_freq;
653 int fill_costs =
654 frame_is_intra_only(cm) || (cm->current_frame.frame_number & 0x07) == 1;
655 int num_planes = av1_num_planes(cm);
656
657 rd->RDMULT = av1_compute_rd_mult(
658 cpi, cm->quant_params.base_qindex + cm->quant_params.y_dc_delta_q);
659 #if CONFIG_RD_COMMAND
660 if (cpi->oxcf.pass == 2) {
661 const RD_COMMAND *rd_command = &cpi->rd_command;
662 if (rd_command->option_ls[rd_command->frame_index] ==
663 RD_OPTION_SET_Q_RDMULT) {
664 rd->RDMULT = rd_command->rdmult_ls[rd_command->frame_index];
665 }
666 }
667 #endif // CONFIG_RD_COMMAND
668
669 av1_set_error_per_bit(&x->errorperbit, rd->RDMULT);
670
671 set_block_thresholds(cm, rd);
672
673 if ((!use_nonrd_pick_mode && cost_upd_freq.mv != COST_UPD_OFF) ||
674 cost_upd_freq.mv == COST_UPD_TILE || fill_costs)
675 av1_fill_mv_costs(&cm->fc->nmvc, cm->features.cur_frame_force_integer_mv,
676 cm->features.allow_high_precision_mv, mv_costs);
677
678 if ((!use_nonrd_pick_mode && cost_upd_freq.coeff != COST_UPD_OFF) ||
679 cost_upd_freq.coeff == COST_UPD_TILE || fill_costs)
680 av1_fill_coeff_costs(&x->coeff_costs, cm->fc, num_planes);
681
682 if ((!use_nonrd_pick_mode && cost_upd_freq.mode != COST_UPD_OFF) ||
683 cost_upd_freq.mode == COST_UPD_TILE || fill_costs)
684 av1_fill_mode_rates(cm, &x->mode_costs, cm->fc);
685
686 if (!use_nonrd_pick_mode && av1_allow_intrabc(cm) &&
687 !is_stat_generation_stage(cpi)) {
688 av1_fill_dv_costs(&cm->fc->ndvc, x->dv_costs);
689 }
690 }
691
model_rd_norm(int xsq_q10,int * r_q10,int * d_q10)692 static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
693 // NOTE: The tables below must be of the same size.
694
695 // The functions described below are sampled at the four most significant
696 // bits of x^2 + 8 / 256.
697
698 // Normalized rate:
699 // This table models the rate for a Laplacian source with given variance
700 // when quantized with a uniform quantizer with given stepsize. The
701 // closed form expression is:
702 // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
703 // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
704 // and H(x) is the binary entropy function.
705 static const int rate_tab_q10[] = {
706 65536, 6086, 5574, 5275, 5063, 4899, 4764, 4651, 4553, 4389, 4255, 4142,
707 4044, 3958, 3881, 3811, 3748, 3635, 3538, 3453, 3376, 3307, 3244, 3186,
708 3133, 3037, 2952, 2877, 2809, 2747, 2690, 2638, 2589, 2501, 2423, 2353,
709 2290, 2232, 2179, 2130, 2084, 2001, 1928, 1862, 1802, 1748, 1698, 1651,
710 1608, 1530, 1460, 1398, 1342, 1290, 1243, 1199, 1159, 1086, 1021, 963,
711 911, 864, 821, 781, 745, 680, 623, 574, 530, 490, 455, 424,
712 395, 345, 304, 269, 239, 213, 190, 171, 154, 126, 104, 87,
713 73, 61, 52, 44, 38, 28, 21, 16, 12, 10, 8, 6,
714 5, 3, 2, 1, 1, 1, 0, 0,
715 };
716 // Normalized distortion:
717 // This table models the normalized distortion for a Laplacian source
718 // with given variance when quantized with a uniform quantizer
719 // with given stepsize. The closed form expression is:
720 // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
721 // where x = qpstep / sqrt(variance).
722 // Note the actual distortion is Dn * variance.
723 static const int dist_tab_q10[] = {
724 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 5,
725 5, 6, 7, 7, 8, 9, 11, 12, 13, 15, 16, 17,
726 18, 21, 24, 26, 29, 31, 34, 36, 39, 44, 49, 54,
727 59, 64, 69, 73, 78, 88, 97, 106, 115, 124, 133, 142,
728 151, 167, 184, 200, 215, 231, 245, 260, 274, 301, 327, 351,
729 375, 397, 418, 439, 458, 495, 528, 559, 587, 613, 637, 659,
730 680, 717, 749, 777, 801, 823, 842, 859, 874, 899, 919, 936,
731 949, 960, 969, 977, 983, 994, 1001, 1006, 1010, 1013, 1015, 1017,
732 1018, 1020, 1022, 1022, 1023, 1023, 1023, 1024,
733 };
734 static const int xsq_iq_q10[] = {
735 0, 4, 8, 12, 16, 20, 24, 28, 32,
736 40, 48, 56, 64, 72, 80, 88, 96, 112,
737 128, 144, 160, 176, 192, 208, 224, 256, 288,
738 320, 352, 384, 416, 448, 480, 544, 608, 672,
739 736, 800, 864, 928, 992, 1120, 1248, 1376, 1504,
740 1632, 1760, 1888, 2016, 2272, 2528, 2784, 3040, 3296,
741 3552, 3808, 4064, 4576, 5088, 5600, 6112, 6624, 7136,
742 7648, 8160, 9184, 10208, 11232, 12256, 13280, 14304, 15328,
743 16352, 18400, 20448, 22496, 24544, 26592, 28640, 30688, 32736,
744 36832, 40928, 45024, 49120, 53216, 57312, 61408, 65504, 73696,
745 81888, 90080, 98272, 106464, 114656, 122848, 131040, 147424, 163808,
746 180192, 196576, 212960, 229344, 245728,
747 };
748 const int tmp = (xsq_q10 >> 2) + 8;
749 const int k = get_msb(tmp) - 3;
750 const int xq = (k << 3) + ((tmp >> k) & 0x7);
751 const int one_q10 = 1 << 10;
752 const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k);
753 const int b_q10 = one_q10 - a_q10;
754 *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
755 *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
756 }
757
av1_model_rd_from_var_lapndz(int64_t var,unsigned int n_log2,unsigned int qstep,int * rate,int64_t * dist)758 void av1_model_rd_from_var_lapndz(int64_t var, unsigned int n_log2,
759 unsigned int qstep, int *rate,
760 int64_t *dist) {
761 // This function models the rate and distortion for a Laplacian
762 // source with given variance when quantized with a uniform quantizer
763 // with given stepsize. The closed form expressions are in:
764 // Hang and Chen, "Source Model for transform video coder and its
765 // application - Part I: Fundamental Theory", IEEE Trans. Circ.
766 // Sys. for Video Tech., April 1997.
767 if (var == 0) {
768 *rate = 0;
769 *dist = 0;
770 } else {
771 int d_q10, r_q10;
772 static const uint32_t MAX_XSQ_Q10 = 245727;
773 const uint64_t xsq_q10_64 =
774 (((uint64_t)qstep * qstep << (n_log2 + 10)) + (var >> 1)) / var;
775 const int xsq_q10 = (int)AOMMIN(xsq_q10_64, MAX_XSQ_Q10);
776 model_rd_norm(xsq_q10, &r_q10, &d_q10);
777 *rate = ROUND_POWER_OF_TWO(r_q10 << n_log2, 10 - AV1_PROB_COST_SHIFT);
778 *dist = (var * (int64_t)d_q10 + 512) >> 10;
779 }
780 }
781
interp_cubic(const double * p,double x)782 static double interp_cubic(const double *p, double x) {
783 return p[1] + 0.5 * x *
784 (p[2] - p[0] +
785 x * (2.0 * p[0] - 5.0 * p[1] + 4.0 * p[2] - p[3] +
786 x * (3.0 * (p[1] - p[2]) + p[3] - p[0])));
787 }
788
789 /*
790 static double interp_bicubic(const double *p, int p_stride, double x,
791 double y) {
792 double q[4];
793 q[0] = interp_cubic(p, x);
794 q[1] = interp_cubic(p + p_stride, x);
795 q[2] = interp_cubic(p + 2 * p_stride, x);
796 q[3] = interp_cubic(p + 3 * p_stride, x);
797 return interp_cubic(q, y);
798 }
799 */
800
801 static const uint8_t bsize_curvfit_model_cat_lookup[BLOCK_SIZES_ALL] = {
802 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 1, 1, 2, 2, 3, 3
803 };
804
sse_norm_curvfit_model_cat_lookup(double sse_norm)805 static int sse_norm_curvfit_model_cat_lookup(double sse_norm) {
806 return (sse_norm > 16.0);
807 }
808
809 // Models distortion by sse using a logistic function on
810 // l = log2(sse / q^2) as:
811 // dbysse = 16 / (1 + k exp(l + c))
get_dbysse_logistic(double l,double c,double k)812 static double get_dbysse_logistic(double l, double c, double k) {
813 const double A = 16.0;
814 const double dbysse = A / (1 + k * exp(l + c));
815 return dbysse;
816 }
817
818 // Models rate using a clamped linear function on
819 // l = log2(sse / q^2) as:
820 // rate = max(0, a + b * l)
get_rate_clamplinear(double l,double a,double b)821 static double get_rate_clamplinear(double l, double a, double b) {
822 const double rate = a + b * l;
823 return (rate < 0 ? 0 : rate);
824 }
825
826 static const uint8_t bsize_surffit_model_cat_lookup[BLOCK_SIZES_ALL] = {
827 0, 0, 0, 0, 1, 1, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 0, 0, 2, 2, 4, 4
828 };
829
830 static const double surffit_rate_params[9][4] = {
831 {
832 638.390212,
833 2.253108,
834 166.585650,
835 -3.939401,
836 },
837 {
838 5.256905,
839 81.997240,
840 -1.321771,
841 17.694216,
842 },
843 {
844 -74.193045,
845 72.431868,
846 -19.033152,
847 15.407276,
848 },
849 {
850 416.770113,
851 14.794188,
852 167.686830,
853 -6.997756,
854 },
855 {
856 378.511276,
857 9.558376,
858 154.658843,
859 -6.635663,
860 },
861 {
862 277.818787,
863 4.413180,
864 150.317637,
865 -9.893038,
866 },
867 {
868 142.212132,
869 11.542038,
870 94.393964,
871 -5.518517,
872 },
873 {
874 219.100256,
875 4.007421,
876 108.932852,
877 -6.981310,
878 },
879 {
880 222.261971,
881 3.251049,
882 95.972916,
883 -5.609789,
884 },
885 };
886
887 static const double surffit_dist_params[7] = { 1.475844, 4.328362, -5.680233,
888 -0.500994, 0.554585, 4.839478,
889 -0.695837 };
890
rate_surffit_model_params_lookup(BLOCK_SIZE bsize,double xm,double * rpar)891 static void rate_surffit_model_params_lookup(BLOCK_SIZE bsize, double xm,
892 double *rpar) {
893 const int cat = bsize_surffit_model_cat_lookup[bsize];
894 rpar[0] = surffit_rate_params[cat][0] + surffit_rate_params[cat][1] * xm;
895 rpar[1] = surffit_rate_params[cat][2] + surffit_rate_params[cat][3] * xm;
896 }
897
dist_surffit_model_params_lookup(BLOCK_SIZE bsize,double xm,double * dpar)898 static void dist_surffit_model_params_lookup(BLOCK_SIZE bsize, double xm,
899 double *dpar) {
900 (void)bsize;
901 const double *params = surffit_dist_params;
902 dpar[0] = params[0] + params[1] / (1 + exp((xm + params[2]) * params[3]));
903 dpar[1] = params[4] + params[5] * exp(params[6] * xm);
904 }
905
av1_model_rd_surffit(BLOCK_SIZE bsize,double sse_norm,double xm,double yl,double * rate_f,double * distbysse_f)906 void av1_model_rd_surffit(BLOCK_SIZE bsize, double sse_norm, double xm,
907 double yl, double *rate_f, double *distbysse_f) {
908 (void)sse_norm;
909 double rpar[2], dpar[2];
910 rate_surffit_model_params_lookup(bsize, xm, rpar);
911 dist_surffit_model_params_lookup(bsize, xm, dpar);
912
913 *rate_f = get_rate_clamplinear(yl, rpar[0], rpar[1]);
914 *distbysse_f = get_dbysse_logistic(yl, dpar[0], dpar[1]);
915 }
916
917 static const double interp_rgrid_curv[4][65] = {
918 {
919 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
920 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
921 0.000000, 118.257702, 120.210658, 121.434853, 122.100487,
922 122.377758, 122.436865, 72.290102, 96.974289, 101.652727,
923 126.830141, 140.417377, 157.644879, 184.315291, 215.823873,
924 262.300169, 335.919859, 420.624173, 519.185032, 619.854243,
925 726.053595, 827.663369, 933.127475, 1037.988755, 1138.839609,
926 1233.342933, 1333.508064, 1428.760126, 1533.396364, 1616.952052,
927 1744.539319, 1803.413586, 1951.466618, 1994.227838, 2086.031680,
928 2148.635443, 2239.068450, 2222.590637, 2338.859809, 2402.929011,
929 2418.727875, 2435.342670, 2471.159469, 2523.187446, 2591.183827,
930 2674.905840, 2774.110714, 2888.555675, 3017.997952, 3162.194773,
931 3320.903365, 3493.880956, 3680.884773, 3881.672045, 4096.000000,
932 },
933 {
934 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
935 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
936 0.000000, 13.087244, 15.919735, 25.930313, 24.412411,
937 28.567417, 29.924194, 30.857010, 32.742979, 36.382570,
938 39.210386, 42.265690, 47.378572, 57.014850, 82.740067,
939 137.346562, 219.968084, 316.781856, 415.643773, 516.706538,
940 614.914364, 714.303763, 815.512135, 911.210485, 1008.501528,
941 1109.787854, 1213.772279, 1322.922561, 1414.752579, 1510.505641,
942 1615.741888, 1697.989032, 1780.123933, 1847.453790, 1913.742309,
943 1960.828122, 2047.500168, 2085.454095, 2129.230668, 2158.171824,
944 2182.231724, 2217.684864, 2269.589211, 2337.264824, 2420.618694,
945 2519.557814, 2633.989178, 2763.819779, 2908.956609, 3069.306660,
946 3244.776927, 3435.274401, 3640.706076, 3860.978945, 4096.000000,
947 },
948 {
949 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
950 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
951 0.000000, 4.656893, 5.123633, 5.594132, 6.162376,
952 6.918433, 7.768444, 8.739415, 10.105862, 11.477328,
953 13.236604, 15.421030, 19.093623, 25.801871, 46.724612,
954 98.841054, 181.113466, 272.586364, 359.499769, 445.546343,
955 525.944439, 605.188743, 681.793483, 756.668359, 838.486885,
956 926.950356, 1015.482542, 1113.353926, 1204.897193, 1288.871992,
957 1373.464145, 1455.746628, 1527.796460, 1588.475066, 1658.144771,
958 1710.302500, 1807.563351, 1863.197608, 1927.281616, 1964.450872,
959 2022.719898, 2100.041145, 2185.205712, 2280.993936, 2387.616216,
960 2505.282950, 2634.204540, 2774.591385, 2926.653884, 3090.602436,
961 3266.647443, 3454.999303, 3655.868416, 3869.465182, 4096.000000,
962 },
963 {
964 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
965 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
966 0.000000, 0.337370, 0.391916, 0.468839, 0.566334,
967 0.762564, 1.069225, 1.384361, 1.787581, 2.293948,
968 3.251909, 4.412991, 8.050068, 11.606073, 27.668092,
969 65.227758, 128.463938, 202.097653, 262.715851, 312.464873,
970 355.601398, 400.609054, 447.201352, 495.761568, 552.871938,
971 619.067625, 691.984883, 773.753288, 860.628503, 946.262808,
972 1019.805896, 1106.061360, 1178.422145, 1244.852258, 1302.173987,
973 1399.650266, 1548.092912, 1545.928652, 1670.817500, 1694.523823,
974 1779.195362, 1882.155494, 1990.662097, 2108.325181, 2235.456119,
975 2372.366287, 2519.367059, 2676.769812, 2844.885918, 3024.026754,
976 3214.503695, 3416.628115, 3630.711389, 3857.064892, 4096.000000,
977 },
978 };
979
980 static const double interp_dgrid_curv[3][65] = {
981 {
982 16.000000, 15.962891, 15.925174, 15.886888, 15.848074, 15.808770,
983 15.769015, 15.728850, 15.688313, 15.647445, 15.606284, 15.564870,
984 15.525918, 15.483820, 15.373330, 15.126844, 14.637442, 14.184387,
985 13.560070, 12.880717, 12.165995, 11.378144, 10.438769, 9.130790,
986 7.487633, 5.688649, 4.267515, 3.196300, 2.434201, 1.834064,
987 1.369920, 1.035921, 0.775279, 0.574895, 0.427232, 0.314123,
988 0.233236, 0.171440, 0.128188, 0.092762, 0.067569, 0.049324,
989 0.036330, 0.027008, 0.019853, 0.015539, 0.011093, 0.008733,
990 0.007624, 0.008105, 0.005427, 0.004065, 0.003427, 0.002848,
991 0.002328, 0.001865, 0.001457, 0.001103, 0.000801, 0.000550,
992 0.000348, 0.000193, 0.000085, 0.000021, 0.000000,
993 },
994 {
995 16.000000, 15.996116, 15.984769, 15.966413, 15.941505, 15.910501,
996 15.873856, 15.832026, 15.785466, 15.734633, 15.679981, 15.621967,
997 15.560961, 15.460157, 15.288367, 15.052462, 14.466922, 13.921212,
998 13.073692, 12.222005, 11.237799, 9.985848, 8.898823, 7.423519,
999 5.995325, 4.773152, 3.744032, 2.938217, 2.294526, 1.762412,
1000 1.327145, 1.020728, 0.765535, 0.570548, 0.425833, 0.313825,
1001 0.232959, 0.171324, 0.128174, 0.092750, 0.067558, 0.049319,
1002 0.036330, 0.027008, 0.019853, 0.015539, 0.011093, 0.008733,
1003 0.007624, 0.008105, 0.005427, 0.004065, 0.003427, 0.002848,
1004 0.002328, 0.001865, 0.001457, 0.001103, 0.000801, 0.000550,
1005 0.000348, 0.000193, 0.000085, 0.000021, -0.000000,
1006 },
1007 };
1008
av1_model_rd_curvfit(BLOCK_SIZE bsize,double sse_norm,double xqr,double * rate_f,double * distbysse_f)1009 void av1_model_rd_curvfit(BLOCK_SIZE bsize, double sse_norm, double xqr,
1010 double *rate_f, double *distbysse_f) {
1011 const double x_start = -15.5;
1012 const double x_end = 16.5;
1013 const double x_step = 0.5;
1014 const double epsilon = 1e-6;
1015 const int rcat = bsize_curvfit_model_cat_lookup[bsize];
1016 const int dcat = sse_norm_curvfit_model_cat_lookup(sse_norm);
1017 (void)x_end;
1018
1019 xqr = AOMMAX(xqr, x_start + x_step + epsilon);
1020 xqr = AOMMIN(xqr, x_end - x_step - epsilon);
1021 const double x = (xqr - x_start) / x_step;
1022 const int xi = (int)floor(x);
1023 const double xo = x - xi;
1024
1025 assert(xi > 0);
1026
1027 const double *prate = &interp_rgrid_curv[rcat][(xi - 1)];
1028 *rate_f = interp_cubic(prate, xo);
1029 const double *pdist = &interp_dgrid_curv[dcat][(xi - 1)];
1030 *distbysse_f = interp_cubic(pdist, xo);
1031 }
1032
get_entropy_contexts_plane(BLOCK_SIZE plane_bsize,const struct macroblockd_plane * pd,ENTROPY_CONTEXT t_above[MAX_MIB_SIZE],ENTROPY_CONTEXT t_left[MAX_MIB_SIZE])1033 static void get_entropy_contexts_plane(BLOCK_SIZE plane_bsize,
1034 const struct macroblockd_plane *pd,
1035 ENTROPY_CONTEXT t_above[MAX_MIB_SIZE],
1036 ENTROPY_CONTEXT t_left[MAX_MIB_SIZE]) {
1037 const int num_4x4_w = mi_size_wide[plane_bsize];
1038 const int num_4x4_h = mi_size_high[plane_bsize];
1039 const ENTROPY_CONTEXT *const above = pd->above_entropy_context;
1040 const ENTROPY_CONTEXT *const left = pd->left_entropy_context;
1041
1042 memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
1043 memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
1044 }
1045
av1_get_entropy_contexts(BLOCK_SIZE plane_bsize,const struct macroblockd_plane * pd,ENTROPY_CONTEXT t_above[MAX_MIB_SIZE],ENTROPY_CONTEXT t_left[MAX_MIB_SIZE])1046 void av1_get_entropy_contexts(BLOCK_SIZE plane_bsize,
1047 const struct macroblockd_plane *pd,
1048 ENTROPY_CONTEXT t_above[MAX_MIB_SIZE],
1049 ENTROPY_CONTEXT t_left[MAX_MIB_SIZE]) {
1050 assert(plane_bsize < BLOCK_SIZES_ALL);
1051 get_entropy_contexts_plane(plane_bsize, pd, t_above, t_left);
1052 }
1053
av1_mv_pred(const AV1_COMP * cpi,MACROBLOCK * x,uint8_t * ref_y_buffer,int ref_y_stride,int ref_frame,BLOCK_SIZE block_size)1054 void av1_mv_pred(const AV1_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer,
1055 int ref_y_stride, int ref_frame, BLOCK_SIZE block_size) {
1056 const MV_REFERENCE_FRAME ref_frames[2] = { ref_frame, NONE_FRAME };
1057 const int_mv ref_mv =
1058 av1_get_ref_mv_from_stack(0, ref_frames, 0, &x->mbmi_ext);
1059 const int_mv ref_mv1 =
1060 av1_get_ref_mv_from_stack(0, ref_frames, 1, &x->mbmi_ext);
1061 MV pred_mv[MAX_MV_REF_CANDIDATES + 1];
1062 int num_mv_refs = 0;
1063 pred_mv[num_mv_refs++] = ref_mv.as_mv;
1064 if (ref_mv.as_int != ref_mv1.as_int) {
1065 pred_mv[num_mv_refs++] = ref_mv1.as_mv;
1066 }
1067
1068 assert(num_mv_refs <= (int)(sizeof(pred_mv) / sizeof(pred_mv[0])));
1069
1070 const uint8_t *const src_y_ptr = x->plane[0].src.buf;
1071 int zero_seen = 0;
1072 int best_sad = INT_MAX;
1073 int max_mv = 0;
1074 // Get the sad for each candidate reference mv.
1075 for (int i = 0; i < num_mv_refs; ++i) {
1076 const MV *this_mv = &pred_mv[i];
1077 const int fp_row = (this_mv->row + 3 + (this_mv->row >= 0)) >> 3;
1078 const int fp_col = (this_mv->col + 3 + (this_mv->col >= 0)) >> 3;
1079 max_mv = AOMMAX(max_mv, AOMMAX(abs(this_mv->row), abs(this_mv->col)) >> 3);
1080
1081 if (fp_row == 0 && fp_col == 0 && zero_seen) continue;
1082 zero_seen |= (fp_row == 0 && fp_col == 0);
1083
1084 const uint8_t *const ref_y_ptr =
1085 &ref_y_buffer[ref_y_stride * fp_row + fp_col];
1086 // Find sad for current vector.
1087 const int this_sad = cpi->ppi->fn_ptr[block_size].sdf(
1088 src_y_ptr, x->plane[0].src.stride, ref_y_ptr, ref_y_stride);
1089 // Note if it is the best so far.
1090 if (this_sad < best_sad) {
1091 best_sad = this_sad;
1092 }
1093 if (i == 0)
1094 x->pred_mv0_sad[ref_frame] = this_sad;
1095 else if (i == 1)
1096 x->pred_mv1_sad[ref_frame] = this_sad;
1097 }
1098
1099 // Note the index of the mv that worked best in the reference list.
1100 x->max_mv_context[ref_frame] = max_mv;
1101 x->pred_mv_sad[ref_frame] = best_sad;
1102 }
1103
av1_setup_pred_block(const MACROBLOCKD * xd,struct buf_2d dst[MAX_MB_PLANE],const YV12_BUFFER_CONFIG * src,const struct scale_factors * scale,const struct scale_factors * scale_uv,const int num_planes)1104 void av1_setup_pred_block(const MACROBLOCKD *xd,
1105 struct buf_2d dst[MAX_MB_PLANE],
1106 const YV12_BUFFER_CONFIG *src,
1107 const struct scale_factors *scale,
1108 const struct scale_factors *scale_uv,
1109 const int num_planes) {
1110 dst[0].buf = src->y_buffer;
1111 dst[0].stride = src->y_stride;
1112 dst[1].buf = src->u_buffer;
1113 dst[2].buf = src->v_buffer;
1114 dst[1].stride = dst[2].stride = src->uv_stride;
1115
1116 const int mi_row = xd->mi_row;
1117 const int mi_col = xd->mi_col;
1118 for (int i = 0; i < num_planes; ++i) {
1119 setup_pred_plane(dst + i, xd->mi[0]->bsize, dst[i].buf,
1120 i ? src->uv_crop_width : src->y_crop_width,
1121 i ? src->uv_crop_height : src->y_crop_height,
1122 dst[i].stride, mi_row, mi_col, i ? scale_uv : scale,
1123 xd->plane[i].subsampling_x, xd->plane[i].subsampling_y);
1124 }
1125 }
1126
av1_get_scaled_ref_frame(const AV1_COMP * cpi,int ref_frame)1127 YV12_BUFFER_CONFIG *av1_get_scaled_ref_frame(const AV1_COMP *cpi,
1128 int ref_frame) {
1129 assert(ref_frame >= LAST_FRAME && ref_frame <= ALTREF_FRAME);
1130 RefCntBuffer *const scaled_buf = cpi->scaled_ref_buf[ref_frame - 1];
1131 const RefCntBuffer *const ref_buf =
1132 get_ref_frame_buf(&cpi->common, ref_frame);
1133 return (scaled_buf != ref_buf && scaled_buf != NULL) ? &scaled_buf->buf
1134 : NULL;
1135 }
1136
av1_get_switchable_rate(const MACROBLOCK * x,const MACROBLOCKD * xd,InterpFilter interp_filter,int dual_filter)1137 int av1_get_switchable_rate(const MACROBLOCK *x, const MACROBLOCKD *xd,
1138 InterpFilter interp_filter, int dual_filter) {
1139 if (interp_filter == SWITCHABLE) {
1140 const MB_MODE_INFO *const mbmi = xd->mi[0];
1141 int inter_filter_cost = 0;
1142 for (int dir = 0; dir < 2; ++dir) {
1143 if (dir && !dual_filter) break;
1144 const int ctx = av1_get_pred_context_switchable_interp(xd, dir);
1145 const InterpFilter filter =
1146 av1_extract_interp_filter(mbmi->interp_filters, dir);
1147 inter_filter_cost += x->mode_costs.switchable_interp_costs[ctx][filter];
1148 }
1149 return SWITCHABLE_INTERP_RATE_FACTOR * inter_filter_cost;
1150 } else {
1151 return 0;
1152 }
1153 }
1154
av1_set_rd_speed_thresholds(AV1_COMP * cpi)1155 void av1_set_rd_speed_thresholds(AV1_COMP *cpi) {
1156 RD_OPT *const rd = &cpi->rd;
1157
1158 // Set baseline threshold values.
1159 av1_zero(rd->thresh_mult);
1160
1161 rd->thresh_mult[THR_NEARESTMV] = 300;
1162 rd->thresh_mult[THR_NEARESTL2] = 300;
1163 rd->thresh_mult[THR_NEARESTL3] = 300;
1164 rd->thresh_mult[THR_NEARESTB] = 300;
1165 rd->thresh_mult[THR_NEARESTA2] = 300;
1166 rd->thresh_mult[THR_NEARESTA] = 300;
1167 rd->thresh_mult[THR_NEARESTG] = 300;
1168
1169 rd->thresh_mult[THR_NEWMV] = 1000;
1170 rd->thresh_mult[THR_NEWL2] = 1000;
1171 rd->thresh_mult[THR_NEWL3] = 1000;
1172 rd->thresh_mult[THR_NEWB] = 1000;
1173 rd->thresh_mult[THR_NEWA2] = 1100;
1174 rd->thresh_mult[THR_NEWA] = 1000;
1175 rd->thresh_mult[THR_NEWG] = 1000;
1176
1177 rd->thresh_mult[THR_NEARMV] = 1000;
1178 rd->thresh_mult[THR_NEARL2] = 1000;
1179 rd->thresh_mult[THR_NEARL3] = 1000;
1180 rd->thresh_mult[THR_NEARB] = 1000;
1181 rd->thresh_mult[THR_NEARA2] = 1000;
1182 rd->thresh_mult[THR_NEARA] = 1000;
1183 rd->thresh_mult[THR_NEARG] = 1000;
1184
1185 rd->thresh_mult[THR_GLOBALMV] = 2200;
1186 rd->thresh_mult[THR_GLOBALL2] = 2000;
1187 rd->thresh_mult[THR_GLOBALL3] = 2000;
1188 rd->thresh_mult[THR_GLOBALB] = 2400;
1189 rd->thresh_mult[THR_GLOBALA2] = 2000;
1190 rd->thresh_mult[THR_GLOBALG] = 2000;
1191 rd->thresh_mult[THR_GLOBALA] = 2400;
1192
1193 rd->thresh_mult[THR_COMP_NEAREST_NEARESTLA] = 1100;
1194 rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2A] = 1000;
1195 rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3A] = 800;
1196 rd->thresh_mult[THR_COMP_NEAREST_NEARESTGA] = 900;
1197 rd->thresh_mult[THR_COMP_NEAREST_NEARESTLB] = 1000;
1198 rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2B] = 1000;
1199 rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3B] = 1000;
1200 rd->thresh_mult[THR_COMP_NEAREST_NEARESTGB] = 1000;
1201 rd->thresh_mult[THR_COMP_NEAREST_NEARESTLA2] = 1000;
1202 rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2A2] = 1000;
1203 rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3A2] = 1000;
1204 rd->thresh_mult[THR_COMP_NEAREST_NEARESTGA2] = 1000;
1205
1206 rd->thresh_mult[THR_COMP_NEAREST_NEARESTLL2] = 2000;
1207 rd->thresh_mult[THR_COMP_NEAREST_NEARESTLL3] = 2000;
1208 rd->thresh_mult[THR_COMP_NEAREST_NEARESTLG] = 2000;
1209 rd->thresh_mult[THR_COMP_NEAREST_NEARESTBA] = 2000;
1210
1211 rd->thresh_mult[THR_COMP_NEAR_NEARLA] = 1200;
1212 rd->thresh_mult[THR_COMP_NEAREST_NEWLA] = 1500;
1213 rd->thresh_mult[THR_COMP_NEW_NEARESTLA] = 1500;
1214 rd->thresh_mult[THR_COMP_NEAR_NEWLA] = 1530;
1215 rd->thresh_mult[THR_COMP_NEW_NEARLA] = 1870;
1216 rd->thresh_mult[THR_COMP_NEW_NEWLA] = 2400;
1217 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLA] = 2750;
1218
1219 rd->thresh_mult[THR_COMP_NEAR_NEARL2A] = 1200;
1220 rd->thresh_mult[THR_COMP_NEAREST_NEWL2A] = 1500;
1221 rd->thresh_mult[THR_COMP_NEW_NEARESTL2A] = 1500;
1222 rd->thresh_mult[THR_COMP_NEAR_NEWL2A] = 1870;
1223 rd->thresh_mult[THR_COMP_NEW_NEARL2A] = 1700;
1224 rd->thresh_mult[THR_COMP_NEW_NEWL2A] = 1800;
1225 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL2A] = 2500;
1226
1227 rd->thresh_mult[THR_COMP_NEAR_NEARL3A] = 1200;
1228 rd->thresh_mult[THR_COMP_NEAREST_NEWL3A] = 1500;
1229 rd->thresh_mult[THR_COMP_NEW_NEARESTL3A] = 1500;
1230 rd->thresh_mult[THR_COMP_NEAR_NEWL3A] = 1700;
1231 rd->thresh_mult[THR_COMP_NEW_NEARL3A] = 1700;
1232 rd->thresh_mult[THR_COMP_NEW_NEWL3A] = 2000;
1233 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL3A] = 3000;
1234
1235 rd->thresh_mult[THR_COMP_NEAR_NEARGA] = 1320;
1236 rd->thresh_mult[THR_COMP_NEAREST_NEWGA] = 1500;
1237 rd->thresh_mult[THR_COMP_NEW_NEARESTGA] = 1500;
1238 rd->thresh_mult[THR_COMP_NEAR_NEWGA] = 2040;
1239 rd->thresh_mult[THR_COMP_NEW_NEARGA] = 1700;
1240 rd->thresh_mult[THR_COMP_NEW_NEWGA] = 2000;
1241 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGA] = 2250;
1242
1243 rd->thresh_mult[THR_COMP_NEAR_NEARLB] = 1200;
1244 rd->thresh_mult[THR_COMP_NEAREST_NEWLB] = 1500;
1245 rd->thresh_mult[THR_COMP_NEW_NEARESTLB] = 1500;
1246 rd->thresh_mult[THR_COMP_NEAR_NEWLB] = 1360;
1247 rd->thresh_mult[THR_COMP_NEW_NEARLB] = 1700;
1248 rd->thresh_mult[THR_COMP_NEW_NEWLB] = 2400;
1249 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLB] = 2250;
1250
1251 rd->thresh_mult[THR_COMP_NEAR_NEARL2B] = 1200;
1252 rd->thresh_mult[THR_COMP_NEAREST_NEWL2B] = 1500;
1253 rd->thresh_mult[THR_COMP_NEW_NEARESTL2B] = 1500;
1254 rd->thresh_mult[THR_COMP_NEAR_NEWL2B] = 1700;
1255 rd->thresh_mult[THR_COMP_NEW_NEARL2B] = 1700;
1256 rd->thresh_mult[THR_COMP_NEW_NEWL2B] = 2000;
1257 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL2B] = 2500;
1258
1259 rd->thresh_mult[THR_COMP_NEAR_NEARL3B] = 1200;
1260 rd->thresh_mult[THR_COMP_NEAREST_NEWL3B] = 1500;
1261 rd->thresh_mult[THR_COMP_NEW_NEARESTL3B] = 1500;
1262 rd->thresh_mult[THR_COMP_NEAR_NEWL3B] = 1870;
1263 rd->thresh_mult[THR_COMP_NEW_NEARL3B] = 1700;
1264 rd->thresh_mult[THR_COMP_NEW_NEWL3B] = 2000;
1265 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL3B] = 2500;
1266
1267 rd->thresh_mult[THR_COMP_NEAR_NEARGB] = 1200;
1268 rd->thresh_mult[THR_COMP_NEAREST_NEWGB] = 1500;
1269 rd->thresh_mult[THR_COMP_NEW_NEARESTGB] = 1500;
1270 rd->thresh_mult[THR_COMP_NEAR_NEWGB] = 1700;
1271 rd->thresh_mult[THR_COMP_NEW_NEARGB] = 1700;
1272 rd->thresh_mult[THR_COMP_NEW_NEWGB] = 2000;
1273 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGB] = 2500;
1274
1275 rd->thresh_mult[THR_COMP_NEAR_NEARLA2] = 1200;
1276 rd->thresh_mult[THR_COMP_NEAREST_NEWLA2] = 1800;
1277 rd->thresh_mult[THR_COMP_NEW_NEARESTLA2] = 1500;
1278 rd->thresh_mult[THR_COMP_NEAR_NEWLA2] = 1700;
1279 rd->thresh_mult[THR_COMP_NEW_NEARLA2] = 1700;
1280 rd->thresh_mult[THR_COMP_NEW_NEWLA2] = 2000;
1281 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLA2] = 2500;
1282
1283 rd->thresh_mult[THR_COMP_NEAR_NEARL2A2] = 1200;
1284 rd->thresh_mult[THR_COMP_NEAREST_NEWL2A2] = 1500;
1285 rd->thresh_mult[THR_COMP_NEW_NEARESTL2A2] = 1500;
1286 rd->thresh_mult[THR_COMP_NEAR_NEWL2A2] = 1700;
1287 rd->thresh_mult[THR_COMP_NEW_NEARL2A2] = 1700;
1288 rd->thresh_mult[THR_COMP_NEW_NEWL2A2] = 2000;
1289 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL2A2] = 2500;
1290
1291 rd->thresh_mult[THR_COMP_NEAR_NEARL3A2] = 1440;
1292 rd->thresh_mult[THR_COMP_NEAREST_NEWL3A2] = 1500;
1293 rd->thresh_mult[THR_COMP_NEW_NEARESTL3A2] = 1500;
1294 rd->thresh_mult[THR_COMP_NEAR_NEWL3A2] = 1700;
1295 rd->thresh_mult[THR_COMP_NEW_NEARL3A2] = 1700;
1296 rd->thresh_mult[THR_COMP_NEW_NEWL3A2] = 2000;
1297 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL3A2] = 2500;
1298
1299 rd->thresh_mult[THR_COMP_NEAR_NEARGA2] = 1200;
1300 rd->thresh_mult[THR_COMP_NEAREST_NEWGA2] = 1500;
1301 rd->thresh_mult[THR_COMP_NEW_NEARESTGA2] = 1500;
1302 rd->thresh_mult[THR_COMP_NEAR_NEWGA2] = 1700;
1303 rd->thresh_mult[THR_COMP_NEW_NEARGA2] = 1700;
1304 rd->thresh_mult[THR_COMP_NEW_NEWGA2] = 2000;
1305 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGA2] = 2750;
1306
1307 rd->thresh_mult[THR_COMP_NEAR_NEARLL2] = 1600;
1308 rd->thresh_mult[THR_COMP_NEAREST_NEWLL2] = 2000;
1309 rd->thresh_mult[THR_COMP_NEW_NEARESTLL2] = 2000;
1310 rd->thresh_mult[THR_COMP_NEAR_NEWLL2] = 2640;
1311 rd->thresh_mult[THR_COMP_NEW_NEARLL2] = 2200;
1312 rd->thresh_mult[THR_COMP_NEW_NEWLL2] = 2400;
1313 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLL2] = 3200;
1314
1315 rd->thresh_mult[THR_COMP_NEAR_NEARLL3] = 1600;
1316 rd->thresh_mult[THR_COMP_NEAREST_NEWLL3] = 2000;
1317 rd->thresh_mult[THR_COMP_NEW_NEARESTLL3] = 1800;
1318 rd->thresh_mult[THR_COMP_NEAR_NEWLL3] = 2200;
1319 rd->thresh_mult[THR_COMP_NEW_NEARLL3] = 2200;
1320 rd->thresh_mult[THR_COMP_NEW_NEWLL3] = 2400;
1321 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLL3] = 3200;
1322
1323 rd->thresh_mult[THR_COMP_NEAR_NEARLG] = 1760;
1324 rd->thresh_mult[THR_COMP_NEAREST_NEWLG] = 2400;
1325 rd->thresh_mult[THR_COMP_NEW_NEARESTLG] = 2000;
1326 rd->thresh_mult[THR_COMP_NEAR_NEWLG] = 1760;
1327 rd->thresh_mult[THR_COMP_NEW_NEARLG] = 2640;
1328 rd->thresh_mult[THR_COMP_NEW_NEWLG] = 2400;
1329 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLG] = 3200;
1330
1331 rd->thresh_mult[THR_COMP_NEAR_NEARBA] = 1600;
1332 rd->thresh_mult[THR_COMP_NEAREST_NEWBA] = 2000;
1333 rd->thresh_mult[THR_COMP_NEW_NEARESTBA] = 2000;
1334 rd->thresh_mult[THR_COMP_NEAR_NEWBA] = 2200;
1335 rd->thresh_mult[THR_COMP_NEW_NEARBA] = 1980;
1336 rd->thresh_mult[THR_COMP_NEW_NEWBA] = 2640;
1337 rd->thresh_mult[THR_COMP_GLOBAL_GLOBALBA] = 3200;
1338
1339 rd->thresh_mult[THR_DC] = 1000;
1340 rd->thresh_mult[THR_PAETH] = 1000;
1341 rd->thresh_mult[THR_SMOOTH] = 2200;
1342 rd->thresh_mult[THR_SMOOTH_V] = 2000;
1343 rd->thresh_mult[THR_SMOOTH_H] = 2000;
1344 rd->thresh_mult[THR_H_PRED] = 2000;
1345 rd->thresh_mult[THR_V_PRED] = 1800;
1346 rd->thresh_mult[THR_D135_PRED] = 2500;
1347 rd->thresh_mult[THR_D203_PRED] = 2000;
1348 rd->thresh_mult[THR_D157_PRED] = 2500;
1349 rd->thresh_mult[THR_D67_PRED] = 2000;
1350 rd->thresh_mult[THR_D113_PRED] = 2500;
1351 rd->thresh_mult[THR_D45_PRED] = 2500;
1352 }
1353
update_thr_fact(int (* factor_buf)[MAX_MODES],THR_MODES best_mode_index,THR_MODES mode_start,THR_MODES mode_end,BLOCK_SIZE min_size,BLOCK_SIZE max_size,int max_rd_thresh_factor)1354 static INLINE void update_thr_fact(int (*factor_buf)[MAX_MODES],
1355 THR_MODES best_mode_index,
1356 THR_MODES mode_start, THR_MODES mode_end,
1357 BLOCK_SIZE min_size, BLOCK_SIZE max_size,
1358 int max_rd_thresh_factor) {
1359 for (THR_MODES mode = mode_start; mode < mode_end; ++mode) {
1360 for (BLOCK_SIZE bs = min_size; bs <= max_size; ++bs) {
1361 int *const fact = &factor_buf[bs][mode];
1362 if (mode == best_mode_index) {
1363 *fact -= (*fact >> RD_THRESH_LOG_DEC_FACTOR);
1364 } else {
1365 *fact = AOMMIN(*fact + RD_THRESH_INC, max_rd_thresh_factor);
1366 }
1367 }
1368 }
1369 }
1370
av1_update_rd_thresh_fact(const AV1_COMMON * const cm,int (* factor_buf)[MAX_MODES],int use_adaptive_rd_thresh,BLOCK_SIZE bsize,THR_MODES best_mode_index,THR_MODES inter_mode_start,THR_MODES inter_mode_end,THR_MODES intra_mode_start,THR_MODES intra_mode_end)1371 void av1_update_rd_thresh_fact(
1372 const AV1_COMMON *const cm, int (*factor_buf)[MAX_MODES],
1373 int use_adaptive_rd_thresh, BLOCK_SIZE bsize, THR_MODES best_mode_index,
1374 THR_MODES inter_mode_start, THR_MODES inter_mode_end,
1375 THR_MODES intra_mode_start, THR_MODES intra_mode_end) {
1376 assert(use_adaptive_rd_thresh > 0);
1377 const int max_rd_thresh_factor = use_adaptive_rd_thresh * RD_THRESH_MAX_FACT;
1378
1379 const int bsize_is_1_to_4 = bsize > cm->seq_params->sb_size;
1380 BLOCK_SIZE min_size, max_size;
1381 if (bsize_is_1_to_4) {
1382 // This part handles block sizes with 1:4 and 4:1 aspect ratios
1383 // TODO(any): Experiment with threshold update for parent/child blocks
1384 min_size = bsize;
1385 max_size = bsize;
1386 } else {
1387 min_size = AOMMAX(bsize - 2, BLOCK_4X4);
1388 max_size = AOMMIN(bsize + 2, (int)cm->seq_params->sb_size);
1389 }
1390
1391 update_thr_fact(factor_buf, best_mode_index, inter_mode_start, inter_mode_end,
1392 min_size, max_size, max_rd_thresh_factor);
1393 update_thr_fact(factor_buf, best_mode_index, intra_mode_start, intra_mode_end,
1394 min_size, max_size, max_rd_thresh_factor);
1395 }
1396
av1_get_intra_cost_penalty(int qindex,int qdelta,aom_bit_depth_t bit_depth)1397 int av1_get_intra_cost_penalty(int qindex, int qdelta,
1398 aom_bit_depth_t bit_depth) {
1399 const int q = av1_dc_quant_QTX(qindex, qdelta, bit_depth);
1400 switch (bit_depth) {
1401 case AOM_BITS_8: return 20 * q;
1402 case AOM_BITS_10: return 5 * q;
1403 case AOM_BITS_12: return ROUND_POWER_OF_TWO(5 * q, 2);
1404 default:
1405 assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12");
1406 return -1;
1407 }
1408 }
1409