1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <assert.h>
12 #include <math.h>
13 #include <stdint.h>
14
15 #include "vpx_dsp_common.h"
16 //#include "vpx_mem.h"
17 #include "bitops.h"
18 #include "mem.h"
19 //#include "system_state.h"
20
21 #include "vp9_entropy.h"
22 #include "vp9_entropymode.h"
23 #include "EbUtility.h"
24 #include "vp9_pred_common.h"
25 #include "vp9_quant_common.h"
26
27 #include "vp9_cost.h"
28 //#include "vp9_encodemb.h"
29 #include "vp9_encodemv.h"
30 #include "vp9_encoder.h"
31 //#include "vp9_mcomp.h"
32 #include "vp9_ratectrl.h"
33 #include "vp9_rd.h"
34 #include "vp9_tokenize.h"
35
36 #define RD_THRESH_POW 1.25
37
38 // Factor to weigh the rate for switchable interp filters.
39 #define SWITCHABLE_INTERP_RATE_FACTOR 1
40
eb_vp9_rd_cost_reset(RD_COST * rd_cost)41 void eb_vp9_rd_cost_reset(RD_COST *rd_cost) {
42 rd_cost->rate = INT_MAX;
43 rd_cost->dist = INT64_MAX;
44 rd_cost->rdcost = INT64_MAX;
45 }
46
eb_vp9_rd_cost_init(RD_COST * rd_cost)47 void eb_vp9_rd_cost_init(RD_COST *rd_cost) {
48 rd_cost->rate = 0;
49 rd_cost->dist = 0;
50 rd_cost->rdcost = 0;
51 }
52
fill_mode_costs(VP9_COMP * cpi)53 static void fill_mode_costs(VP9_COMP *cpi) {
54 const FRAME_CONTEXT *const fc = cpi->common.fc;
55 int i, j;
56
57 for (i = 0; i < INTRA_MODES; ++i) {
58 for (j = 0; j < INTRA_MODES; ++j) {
59 eb_vp9_cost_tokens(cpi->y_mode_costs[i][j], eb_vp9_kf_y_mode_prob[i][j],
60 eb_vp9_intra_mode_tree);
61 }
62 }
63
64 eb_vp9_cost_tokens(cpi->mbmode_cost, fc->y_mode_prob[1], eb_vp9_intra_mode_tree);
65 for (i = 0; i < INTRA_MODES; ++i) {
66 eb_vp9_cost_tokens(cpi->intra_uv_mode_cost[KEY_FRAME][i],
67 eb_vp9_kf_uv_mode_prob[i], eb_vp9_intra_mode_tree);
68 eb_vp9_cost_tokens(cpi->intra_uv_mode_cost[INTER_FRAME][i],
69 fc->uv_mode_prob[i], eb_vp9_intra_mode_tree);
70 }
71
72 for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) {
73 eb_vp9_cost_tokens(cpi->switchable_interp_costs[i],
74 fc->switchable_interp_prob[i], eb_vp9_switchable_interp_tree);
75 }
76
77 for (i = TX_8X8; i < TX_SIZES; ++i) {
78 for (j = 0; j < TX_SIZE_CONTEXTS; ++j) {
79 const vpx_prob *tx_probs = get_tx_probs((TX_SIZE)i, j, &fc->tx_probs);
80 int k;
81 for (k = 0; k <= i; ++k) {
82 int cost = 0;
83 int m;
84 for (m = 0; m <= k - (k == i); ++m) {
85 if (m == k)
86 cost += vp9_cost_zero(tx_probs[m]);
87 else
88 cost += vp9_cost_one(tx_probs[m]);
89 }
90 cpi->tx_size_cost[i - 1][j][k] = cost;
91 }
92 }
93 }
94 }
95
fill_token_costs(vp9_coeff_cost * c,vp9_coeff_probs_model (* p)[PLANE_TYPES])96 static void fill_token_costs(vp9_coeff_cost *c,
97 vp9_coeff_probs_model (*p)[PLANE_TYPES]) {
98 int i, j, k, l;
99 TX_SIZE t;
100 for (t = TX_4X4; t <= TX_32X32; ++t)
101 for (i = 0; i < PLANE_TYPES; ++i)
102 for (j = 0; j < REF_TYPES; ++j)
103 for (k = 0; k < COEF_BANDS; ++k)
104 for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
105 vpx_prob probs[ENTROPY_NODES];
106 eb_vp9_model_to_full_probs(p[t][i][j][k][l], probs);
107 eb_vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs, eb_vp9_coef_tree);
108 eb_vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
109 eb_vp9_coef_tree);
110 assert(c[t][i][j][k][0][l][EOB_TOKEN] ==
111 c[t][i][j][k][1][l][EOB_TOKEN]);
112 }
113 }
114
115 // Values are now correlated to quantizer.
116 static int sad_per_bit16lut_8[QINDEX_RANGE];
117 static int sad_per_bit4lut_8[QINDEX_RANGE];
118
119 #if CONFIG_VP9_HIGHBITDEPTH
120 static int sad_per_bit16lut_10[QINDEX_RANGE];
121 static int sad_per_bit4lut_10[QINDEX_RANGE];
122 static int sad_per_bit16lut_12[QINDEX_RANGE];
123 static int sad_per_bit4lut_12[QINDEX_RANGE];
124 #endif
125
init_me_luts_bd(int * bit16lut,int * bit4lut,int range,vpx_bit_depth_t bit_depth)126 static void init_me_luts_bd(int *bit16lut, int *bit4lut, int range,
127 vpx_bit_depth_t bit_depth) {
128 int i;
129 // Initialize the sad lut tables using a formulaic calculation for now.
130 // This is to make it easier to resolve the impact of experimental changes
131 // to the quantizer tables.
132 for (i = 0; i < range; i++) {
133 const double q = eb_vp9_convert_qindex_to_q(i, bit_depth);
134 bit16lut[i] = (int)(0.0418 * q + 2.4107);
135 bit4lut[i] = (int)(0.063 * q + 2.742);
136 }
137 }
138
eb_vp9_init_me_luts(void)139 void eb_vp9_init_me_luts(void) {
140 init_me_luts_bd(sad_per_bit16lut_8, sad_per_bit4lut_8, QINDEX_RANGE,
141 VPX_BITS_8);
142 #if CONFIG_VP9_HIGHBITDEPTH
143 init_me_luts_bd(sad_per_bit16lut_10, sad_per_bit4lut_10, QINDEX_RANGE,
144 VPX_BITS_10);
145 init_me_luts_bd(sad_per_bit16lut_12, sad_per_bit4lut_12, QINDEX_RANGE,
146 VPX_BITS_12);
147 #endif
148 }
149
150 // Note that the element below for frame type "USE_BUF_FRAME", which indicates
151 // that the show frame flag is set, should not be used as no real frame
152 // is encoded so we should not reach here. However, a dummy value
153 // is inserted here to make sure the data structure has the right number
154 // of values assigned.
155 #if 0
156 static const int rd_frame_type_factor[FRAME_UPDATE_TYPES] = { 128, 144, 128,
157 128, 144, 144 };
158 #endif
eb_vp9_compute_rd_mult_based_on_qindex(const VP9_COMP * cpi,int qindex)159 int64_t eb_vp9_compute_rd_mult_based_on_qindex(const VP9_COMP *cpi, int qindex) {
160 const int64_t q = eb_vp9_dc_quant(qindex, 0, cpi->common.bit_depth);
161 #if CONFIG_VP9_HIGHBITDEPTH
162 int64_t rdmult = 0;
163 switch (cpi->common.bit_depth) {
164 case VPX_BITS_8: rdmult = 88 * q * q / 24; break;
165 case VPX_BITS_10: rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 4); break;
166 default:
167 assert(cpi->common.bit_depth == VPX_BITS_12);
168 rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 8);
169 break;
170 }
171 #else
172 int64_t rdmult = 88 * q * q / 24;
173 #endif // CONFIG_VP9_HIGHBITDEPTH
174 return rdmult;
175 }
176
eb_vp9_compute_rd_mult(const VP9_COMP * cpi,int qindex)177 int eb_vp9_compute_rd_mult(const VP9_COMP *cpi, int qindex) {
178 int64_t rdmult = eb_vp9_compute_rd_mult_based_on_qindex(cpi, qindex);
179 #if 0
180 if (cpi->oxcf.pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
181 const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
182 const FRAME_UPDATE_TYPE frame_type = gf_group->update_type[gf_group->index];
183 const int boost_index = VPXMIN(15, (cpi->rc.gfu_boost / 100));
184
185 rdmult = (rdmult * rd_frame_type_factor[frame_type]) >> 7;
186 rdmult += ((rdmult * rd_boost_factor[boost_index]) >> 7);
187 }
188 #endif
189 if (rdmult < 1) rdmult = 1;
190 return (int)rdmult;
191 }
192
eb_vp9_get_adaptive_rdmult(const VP9_COMP * cpi,double beta)193 int eb_vp9_get_adaptive_rdmult(const VP9_COMP *cpi, double beta) {
194 const VP9_COMMON *cm = &cpi->common;
195 int64_t q = eb_vp9_dc_quant(cm->base_qindex, 0, cpi->common.bit_depth);
196
197 #if CONFIG_VP9_HIGHBITDEPTH
198 int64_t rdmult = 0;
199 switch (cpi->common.bit_depth) {
200 case VPX_BITS_8: rdmult = (int)((88 * q * q / beta) / 24); break;
201 case VPX_BITS_10:
202 rdmult = ROUND_POWER_OF_TWO((int)((88 * q * q / beta) / 24), 4);
203 break;
204 default:
205 assert(cpi->common.bit_depth == VPX_BITS_12);
206 rdmult = ROUND_POWER_OF_TWO((int)((88 * q * q / beta) / 24), 8);
207 break;
208 }
209 #else
210 int64_t rdmult = (int)((88 * q * q / beta) / 24);
211 #endif // CONFIG_VP9_HIGHBITDEPTH
212 #if 0
213 if (cpi->oxcf.pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
214 const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
215 const FRAME_UPDATE_TYPE frame_type = gf_group->update_type[gf_group->index];
216 const int boost_index = VPXMIN(15, (cpi->rc.gfu_boost / 100));
217
218 rdmult = (rdmult * rd_frame_type_factor[frame_type]) >> 7;
219 rdmult += ((rdmult * rd_boost_factor[boost_index]) >> 7);
220 }
221 #endif
222 if (rdmult < 1) rdmult = 1;
223 return (int)rdmult;
224 }
225
eb_vp9_initialize_me_consts(VP9_COMP * cpi,MACROBLOCK * x,int qindex)226 void eb_vp9_initialize_me_consts(VP9_COMP *cpi, MACROBLOCK *x, int qindex) {
227 #if CONFIG_VP9_HIGHBITDEPTH
228 switch (cpi->common.bit_depth) {
229 case VPX_BITS_8:
230 x->sadperbit16 = sad_per_bit16lut_8[qindex];
231 x->sadperbit4 = sad_per_bit4lut_8[qindex];
232 break;
233 case VPX_BITS_10:
234 x->sadperbit16 = sad_per_bit16lut_10[qindex];
235 x->sadperbit4 = sad_per_bit4lut_10[qindex];
236 break;
237 default:
238 assert(cpi->common.bit_depth == VPX_BITS_12);
239 x->sadperbit16 = sad_per_bit16lut_12[qindex];
240 x->sadperbit4 = sad_per_bit4lut_12[qindex];
241 break;
242 }
243 #else
244 (void)cpi;
245 x->sadperbit16 = sad_per_bit16lut_8[qindex];
246 x->sadperbit4 = sad_per_bit4lut_8[qindex];
247 #endif // CONFIG_VP9_HIGHBITDEPTH
248 }
249 #if 0
250 static void set_block_thresholds(const VP9_COMMON *cm, RD_OPT *rd) {
251 int i, bsize, segment_id;
252
253 for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
254 const int qindex =
255 clamp(eb_vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex) +
256 cm->y_dc_delta_q,
257 0, MAXQ);
258 const int q = compute_rd_thresh_factor(qindex, cm->bit_depth);
259
260 for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
261 // Threshold here seems unnecessarily harsh but fine given actual
262 // range of values used for cpi->sf.thresh_mult[].
263 const int t = q * rd_thresh_block_size_factor[bsize];
264 const int thresh_max = INT_MAX / t;
265
266 if (bsize >= BLOCK_8X8) {
267 for (i = 0; i < MAX_MODES; ++i)
268 rd->threshes[segment_id][bsize][i] = rd->thresh_mult[i] < thresh_max
269 ? rd->thresh_mult[i] * t / 4
270 : INT_MAX;
271 } else {
272 for (i = 0; i < MAX_REFS; ++i)
273 rd->threshes[segment_id][bsize][i] =
274 rd->thresh_mult_sub8x8[i] < thresh_max
275 ? rd->thresh_mult_sub8x8[i] * t / 4
276 : INT_MAX;
277 }
278 }
279 }
280 }
281 #endif
282
283 #if 1
eb_vp9_initialize_rd_consts(VP9_COMP * cpi)284 void eb_vp9_initialize_rd_consts(VP9_COMP *cpi) {
285 VP9_COMMON *const cm = &cpi->common;
286 MACROBLOCK *const x = &cpi->td.mb;
287 MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
288 RD_OPT *const rd = &cpi->rd;
289 int i;
290 #if 0
291 vpx_clear_system_state();
292 #endif
293 rd->RDDIV = RDDIV_BITS; // In bits (to multiply D by 128).
294 rd->RDMULT = eb_vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q);
295 #if VP9_RD
296 rd->rd_mult_sad = (int)MAX(round(sqrtf((float)rd->RDMULT / 128) * 128),1);
297
298 #endif
299 set_error_per_bit(x, rd->RDMULT);
300
301 x->select_tx_size = (/*cpi->sf.tx_size_search_method == USE_LARGESTALL &&*/
302 cm->frame_type != KEY_FRAME)
303 ? 0
304 : 1;
305 #if 0
306 set_block_thresholds(cm, rd);
307 #endif
308 set_partition_probs(cm, xd);
309 #if 0
310 if (cpi->oxcf.pass == 1) {
311 if (!frame_is_intra_only(cm))
312 eb_vp9_build_nmv_cost_table(
313 x->nmvjointcost,
314 cm->allow_high_precision_mv ? x->nmvcost_hp : x->nmvcost,
315 &cm->fc->nmvc, cm->allow_high_precision_mv);
316 } else
317 #endif
318 {
319 //if (!cpi->sf.use_nonrd_pick_mode || cm->frame_type == KEY_FRAME)
320 fill_token_costs(x->token_costs, cm->fc->coef_probs);
321
322 //if (cpi->sf.partition_search_type != VAR_BASED_PARTITION ||
323 // cm->frame_type == KEY_FRAME)
324 {
325 for (i = 0; i < PARTITION_CONTEXTS; ++i)
326 eb_vp9_cost_tokens(cpi->partition_cost[i], get_partition_probs(xd, i),
327 eb_vp9_partition_tree);
328 }
329
330 //if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1 ||
331 // cm->frame_type == KEY_FRAME)
332 {
333 fill_mode_costs(cpi);
334
335 if (!frame_is_intra_only(cm)) {
336 eb_vp9_build_nmv_cost_table(
337 x->nmvjointcost,
338 cm->allow_high_precision_mv ? x->nmvcost_hp : x->nmvcost,
339 &cm->fc->nmvc, cm->allow_high_precision_mv);
340
341 for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
342 eb_vp9_cost_tokens((int *)cpi->inter_mode_cost[i],
343 cm->fc->inter_mode_probs[i], eb_vp9_inter_mode_tree);
344 }
345 }
346 }
347 }
348 #endif
349 // NOTE: The tables below must be of the same size.
350
351 // The functions described below are sampled at the four most significant
352 // bits of x^2 + 8 / 256.
353
354 // Normalized rate:
355 // This table models the rate for a Laplacian source with given variance
356 // when quantized with a uniform quantizer with given stepsize. The
357 // closed form expression is:
358 // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
359 // where r = exp(-sqrt(2) * x) and x = qp_step / sqrt(variance),
360 // and H(x) is the binary entropy function.
361 static const int rate_tab_q10[] = {
362 65536, 6086, 5574, 5275, 5063, 4899, 4764, 4651, 4553, 4389, 4255, 4142, 4044,
363 3958, 3881, 3811, 3748, 3635, 3538, 3453, 3376, 3307, 3244, 3186, 3133, 3037,
364 2952, 2877, 2809, 2747, 2690, 2638, 2589, 2501, 2423, 2353, 2290, 2232, 2179,
365 2130, 2084, 2001, 1928, 1862, 1802, 1748, 1698, 1651, 1608, 1530, 1460, 1398,
366 1342, 1290, 1243, 1199, 1159, 1086, 1021, 963, 911, 864, 821, 781, 745,
367 680, 623, 574, 530, 490, 455, 424, 395, 345, 304, 269, 239, 213,
368 190, 171, 154, 126, 104, 87, 73, 61, 52, 44, 38, 28, 21,
369 16, 12, 10, 8, 6, 5, 3, 2, 1, 1, 1, 0, 0,
370 };
371
372 // Normalized distortion:
373 // This table models the normalized distortion for a Laplacian source
374 // with given variance when quantized with a uniform quantizer
375 // with given stepsize. The closed form expression is:
376 // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
377 // where x = qp_step / sqrt(variance).
378 // Note the actual distortion is Dn * variance.
379 static const int dist_tab_q10[] = {
380 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 5, 5,
381 6, 7, 7, 8, 9, 11, 12, 13, 15, 16, 17, 18, 21,
382 24, 26, 29, 31, 34, 36, 39, 44, 49, 54, 59, 64, 69,
383 73, 78, 88, 97, 106, 115, 124, 133, 142, 151, 167, 184, 200,
384 215, 231, 245, 260, 274, 301, 327, 351, 375, 397, 418, 439, 458,
385 495, 528, 559, 587, 613, 637, 659, 680, 717, 749, 777, 801, 823,
386 842, 859, 874, 899, 919, 936, 949, 960, 969, 977, 983, 994, 1001,
387 1006, 1010, 1013, 1015, 1017, 1018, 1020, 1022, 1022, 1023, 1023, 1023, 1024,
388 };
389 static const int xsq_iq_q10[] = {
390 0, 4, 8, 12, 16, 20, 24, 28, 32,
391 40, 48, 56, 64, 72, 80, 88, 96, 112,
392 128, 144, 160, 176, 192, 208, 224, 256, 288,
393 320, 352, 384, 416, 448, 480, 544, 608, 672,
394 736, 800, 864, 928, 992, 1120, 1248, 1376, 1504,
395 1632, 1760, 1888, 2016, 2272, 2528, 2784, 3040, 3296,
396 3552, 3808, 4064, 4576, 5088, 5600, 6112, 6624, 7136,
397 7648, 8160, 9184, 10208, 11232, 12256, 13280, 14304, 15328,
398 16352, 18400, 20448, 22496, 24544, 26592, 28640, 30688, 32736,
399 36832, 40928, 45024, 49120, 53216, 57312, 61408, 65504, 73696,
400 81888, 90080, 98272, 106464, 114656, 122848, 131040, 147424, 163808,
401 180192, 196576, 212960, 229344, 245728,
402 };
403
model_rd_norm(int xsq_q10,int * r_q10,int * d_q10)404 static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
405 const int tmp = (xsq_q10 >> 2) + 8;
406 const int k = get_msb(tmp) - 3;
407 const int xq = (k << 3) + ((tmp >> k) & 0x7);
408 const int one_q10 = 1 << 10;
409 const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k);
410 const int b_q10 = one_q10 - a_q10;
411 *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
412 *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
413 }
414
model_rd_norm_vec(int xsq_q10[MAX_MB_PLANE],int r_q10[MAX_MB_PLANE],int d_q10[MAX_MB_PLANE])415 static void model_rd_norm_vec(int xsq_q10[MAX_MB_PLANE],
416 int r_q10[MAX_MB_PLANE],
417 int d_q10[MAX_MB_PLANE]) {
418 int i;
419 const int one_q10 = 1 << 10;
420 for (i = 0; i < MAX_MB_PLANE; ++i) {
421 const int tmp = (xsq_q10[i] >> 2) + 8;
422 const int k = get_msb(tmp) - 3;
423 const int xq = (k << 3) + ((tmp >> k) & 0x7);
424 const int a_q10 = ((xsq_q10[i] - xsq_iq_q10[xq]) << 10) >> (2 + k);
425 const int b_q10 = one_q10 - a_q10;
426 r_q10[i] = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
427 d_q10[i] = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
428 }
429 }
430
431 static const uint32_t MAX_XSQ_Q10 = 245727;
432
eb_vp9_model_rd_from_var_lapndz(unsigned int var,unsigned int n_log2,unsigned int qstep,int * rate,int64_t * dist)433 void eb_vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n_log2,
434 unsigned int qstep, int *rate,
435 int64_t *dist) {
436 // This function models the rate and distortion for a Laplacian
437 // source with given variance when quantized with a uniform quantizer
438 // with given stepsize. The closed form expressions are in:
439 // Hang and Chen, "Source Model for transform video coder and its
440 // application - Part I: Fundamental Theory", IEEE Trans. Circ.
441 // Sys. for Video Tech., April 1997.
442 if (var == 0) {
443 *rate = 0;
444 *dist = 0;
445 } else {
446 int d_q10, r_q10;
447 const uint64_t xsq_q10_64 =
448 (((uint64_t)qstep * qstep << (n_log2 + 10)) + (var >> 1)) / var;
449 const int xsq_q10 = (int)VPXMIN(xsq_q10_64, MAX_XSQ_Q10);
450 model_rd_norm(xsq_q10, &r_q10, &d_q10);
451 *rate = ROUND_POWER_OF_TWO(r_q10 << n_log2, 10 - VP9_PROB_COST_SHIFT);
452 *dist = (var * (int64_t)d_q10 + 512) >> 10;
453 }
454 }
455
456 // Implements a fixed length vector form of eb_vp9_model_rd_from_var_lapndz where
457 // vectors are of length MAX_MB_PLANE and all elements of var are non-zero.
eb_vp9_model_rd_from_var_lapndz_vec(unsigned int var[MAX_MB_PLANE],unsigned int n_log2[MAX_MB_PLANE],unsigned int qstep[MAX_MB_PLANE],int64_t * rate_sum,int64_t * dist_sum)458 void eb_vp9_model_rd_from_var_lapndz_vec(unsigned int var[MAX_MB_PLANE],
459 unsigned int n_log2[MAX_MB_PLANE],
460 unsigned int qstep[MAX_MB_PLANE],
461 int64_t *rate_sum, int64_t *dist_sum) {
462 int i;
463 int xsq_q10[MAX_MB_PLANE], d_q10[MAX_MB_PLANE], r_q10[MAX_MB_PLANE];
464 for (i = 0; i < MAX_MB_PLANE; ++i) {
465 const uint64_t xsq_q10_64 =
466 (((uint64_t)qstep[i] * qstep[i] << (n_log2[i] + 10)) + (var[i] >> 1)) /
467 var[i];
468 xsq_q10[i] = (int)VPXMIN(xsq_q10_64, MAX_XSQ_Q10);
469 }
470 model_rd_norm_vec(xsq_q10, r_q10, d_q10);
471 for (i = 0; i < MAX_MB_PLANE; ++i) {
472 int rate =
473 ROUND_POWER_OF_TWO(r_q10[i] << n_log2[i], 10 - VP9_PROB_COST_SHIFT);
474 int64_t dist = (var[i] * (int64_t)d_q10[i] + 512) >> 10;
475 *rate_sum += rate;
476 *dist_sum += dist;
477 }
478 }
479
eb_vp9_get_entropy_contexts(BLOCK_SIZE bsize,TX_SIZE tx_size,const struct macroblockd_plane * pd,ENTROPY_CONTEXT t_above[16],ENTROPY_CONTEXT t_left[16])480 void eb_vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
481 const struct macroblockd_plane *pd,
482 ENTROPY_CONTEXT t_above[16],
483 ENTROPY_CONTEXT t_left[16]) {
484 const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
485 const int num_4x4_w = eb_vp9_num_4x4_blocks_wide_lookup[plane_bsize];
486 const int num_4x4_h = eb_vp9_num_4x4_blocks_high_lookup[plane_bsize];
487 const ENTROPY_CONTEXT *const above = pd->above_context;
488 const ENTROPY_CONTEXT *const left = pd->left_context;
489
490 int i;
491 switch (tx_size) {
492 case TX_4X4:
493 memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
494 memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
495 break;
496 case TX_8X8:
497 for (i = 0; i < num_4x4_w; i += 2) {
498 assert(i < 16);
499 t_above[i] = !!*(const uint16_t *)&above[i];
500 }
501 for (i = 0; i < num_4x4_h; i += 2) {
502 assert(i < 16);
503 t_left[i] = !!*(const uint16_t *)&left[i];
504 }
505 break;
506 case TX_16X16:
507 for (i = 0; i < num_4x4_w; i += 4) {
508 assert(i < 16);
509 t_above[i] = !!*(const uint32_t *)&above[i];
510 }
511 for (i = 0; i < num_4x4_h; i += 4) {
512 assert(i < 16);
513 t_left[i] = !!*(const uint32_t *)&left[i];
514 }
515 break;
516 default:
517 assert(tx_size == TX_32X32);
518 for (i = 0; i < num_4x4_w; i += 8) {
519 assert(i < 16);
520 t_above[i] = !!*(const uint64_t *)&above[i];
521 }
522 for (i = 0; i < num_4x4_h; i += 8) {
523 assert(i < 16);
524 t_left[i] = !!*(const uint64_t *)&left[i];
525 }
526 break;
527 }
528 }
529 #if 0
530 void vp9_mv_pred(VP9_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer,
531 int ref_y_stride, int ref_frame, BLOCK_SIZE block_size) {
532 int i;
533 int zero_seen = 0;
534 int best_index = 0;
535 int best_sad = INT_MAX;
536 int this_sad = INT_MAX;
537 int max_mv = 0;
538 int near_same_nearest;
539 uint8_t *src_y_ptr = x->plane[0].src.buf;
540 uint8_t *ref_y_ptr;
541 const int num_mv_refs =
542 MAX_MV_REF_CANDIDATES + (block_size < x->max_partition_size);
543
544 MV pred_mv[3];
545 pred_mv[0] = x->mbmi_ext->ref_mvs[ref_frame][0].as_mv;
546 pred_mv[1] = x->mbmi_ext->ref_mvs[ref_frame][1].as_mv;
547 pred_mv[2] = x->pred_mv[ref_frame];
548 assert(num_mv_refs <= (int)(sizeof(pred_mv) / sizeof(pred_mv[0])));
549
550 near_same_nearest = x->mbmi_ext->ref_mvs[ref_frame][0].as_int ==
551 x->mbmi_ext->ref_mvs[ref_frame][1].as_int;
552
553 // Get the sad for each candidate reference mv.
554 for (i = 0; i < num_mv_refs; ++i) {
555 const MV *this_mv = &pred_mv[i];
556 int fp_row, fp_col;
557 if (this_mv->row == INT16_MAX || this_mv->col == INT16_MAX) continue;
558 if (i == 1 && near_same_nearest) continue;
559 fp_row = (this_mv->row + 3 + (this_mv->row >= 0)) >> 3;
560 fp_col = (this_mv->col + 3 + (this_mv->col >= 0)) >> 3;
561 max_mv = VPXMAX(max_mv, VPXMAX(abs(this_mv->row), abs(this_mv->col)) >> 3);
562
563 if (fp_row == 0 && fp_col == 0 && zero_seen) continue;
564 zero_seen |= (fp_row == 0 && fp_col == 0);
565
566 ref_y_ptr = &ref_y_buffer[ref_y_stride * fp_row + fp_col];
567 // Find sad for current vector.
568 this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
569 ref_y_ptr, ref_y_stride);
570 // Note if it is the best so far.
571 if (this_sad < best_sad) {
572 best_sad = this_sad;
573 best_index = i;
574 }
575 }
576
577 // Note the index of the mv that worked best in the reference list.
578 x->mv_best_ref_index[ref_frame] = best_index;
579 x->max_mv_context[ref_frame] = max_mv;
580 x->pred_mv_sad[ref_frame] = best_sad;
581 }
582
583 void vp9_setup_pred_block(const MACROBLOCKD *xd,
584 struct buf_2d dst[MAX_MB_PLANE],
585 const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
586 const struct scale_factors *scale,
587 const struct scale_factors *scale_uv) {
588 int i;
589
590 dst[0].buf = src->y_buffer;
591 dst[0].stride = src->y_stride;
592 dst[1].buf = src->u_buffer;
593 dst[2].buf = src->v_buffer;
594 dst[1].stride = dst[2].stride = src->uv_stride;
595
596 for (i = 0; i < MAX_MB_PLANE; ++i) {
597 setup_pred_plane(dst + i, dst[i].buf, dst[i].stride, mi_row, mi_col,
598 i ? scale_uv : scale, xd->plane[i].subsampling_x,
599 xd->plane[i].subsampling_y);
600 }
601 }
602
603 int vp9_raster_block_offset(BLOCK_SIZE plane_bsize, int raster_block,
604 int stride) {
605 const int bw = eb_vp9_b_width_log2_lookup[plane_bsize];
606 const int y = 4 * (raster_block >> bw);
607 const int x = 4 * (raster_block & ((1 << bw) - 1));
608 return y * stride + x;
609 }
610
611 int16_t *vp9_raster_block_offset_int16(BLOCK_SIZE plane_bsize, int raster_block,
612 int16_t *base) {
613 const int stride = 4 * eb_vp9_num_4x4_blocks_wide_lookup[plane_bsize];
614 return base + vp9_raster_block_offset(plane_bsize, raster_block, stride);
615 }
616
617 YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi,
618 int ref_frame) {
619 const VP9_COMMON *const cm = &cpi->common;
620 const int scaled_idx = cpi->scaled_ref_idx[ref_frame - 1];
621 const int ref_idx = get_ref_frame_buf_idx(cpi, ref_frame);
622 return (scaled_idx != ref_idx && scaled_idx != INVALID_IDX)
623 ? &cm->buffer_pool->frame_bufs[scaled_idx].buf
624 : NULL;
625 }
626
627 int vp9_get_switchable_rate(const VP9_COMP *cpi, const MACROBLOCKD *const xd) {
628 const ModeInfo *const mi = xd->mi[0];
629 const int ctx = get_pred_context_switchable_interp(xd);
630 return SWITCHABLE_INTERP_RATE_FACTOR *
631 cpi->switchable_interp_costs[ctx][mi->interp_filter];
632 }
633
634 void vp9_set_rd_speed_thresholds(VP9_COMP *cpi) {
635 int i;
636 RD_OPT *const rd = &cpi->rd;
637 SPEED_FEATURES *const sf = &cpi->sf;
638
639 // Set baseline threshold values.
640 for (i = 0; i < MAX_MODES; ++i)
641 rd->thresh_mult[i] = cpi->oxcf.mode == BEST ? -500 : 0;
642
643 if (sf->adaptive_rd_thresh) {
644 rd->thresh_mult[THR_NEARESTMV] = 300;
645 rd->thresh_mult[THR_NEARESTG] = 300;
646 rd->thresh_mult[THR_NEARESTA] = 300;
647 } else {
648 rd->thresh_mult[THR_NEARESTMV] = 0;
649 rd->thresh_mult[THR_NEARESTG] = 0;
650 rd->thresh_mult[THR_NEARESTA] = 0;
651 }
652
653 rd->thresh_mult[THR_DC] += 1000;
654
655 rd->thresh_mult[THR_NEWMV] += 1000;
656 rd->thresh_mult[THR_NEWA] += 1000;
657 rd->thresh_mult[THR_NEWG] += 1000;
658
659 rd->thresh_mult[THR_NEARMV] += 1000;
660 rd->thresh_mult[THR_NEARA] += 1000;
661 rd->thresh_mult[THR_COMP_NEARESTLA] += 1000;
662 rd->thresh_mult[THR_COMP_NEARESTGA] += 1000;
663
664 rd->thresh_mult[THR_TM] += 1000;
665
666 rd->thresh_mult[THR_COMP_NEARLA] += 1500;
667 rd->thresh_mult[THR_COMP_NEWLA] += 2000;
668 rd->thresh_mult[THR_NEARG] += 1000;
669 rd->thresh_mult[THR_COMP_NEARGA] += 1500;
670 rd->thresh_mult[THR_COMP_NEWGA] += 2000;
671
672 rd->thresh_mult[THR_ZEROMV] += 2000;
673 rd->thresh_mult[THR_ZEROG] += 2000;
674 rd->thresh_mult[THR_ZEROA] += 2000;
675 rd->thresh_mult[THR_COMP_ZEROLA] += 2500;
676 rd->thresh_mult[THR_COMP_ZEROGA] += 2500;
677
678 rd->thresh_mult[THR_H_PRED] += 2000;
679 rd->thresh_mult[THR_V_PRED] += 2000;
680 rd->thresh_mult[THR_D45_PRED] += 2500;
681 rd->thresh_mult[THR_D135_PRED] += 2500;
682 rd->thresh_mult[THR_D117_PRED] += 2500;
683 rd->thresh_mult[THR_D153_PRED] += 2500;
684 rd->thresh_mult[THR_D207_PRED] += 2500;
685 rd->thresh_mult[THR_D63_PRED] += 2500;
686 }
687
688 void vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) {
689 static const int thresh_mult[2][MAX_REFS] = {
690 { 2500, 2500, 2500, 4500, 4500, 2500 },
691 { 2000, 2000, 2000, 4000, 4000, 2000 }
692 };
693 RD_OPT *const rd = &cpi->rd;
694 const int idx = cpi->oxcf.mode == BEST;
695 memcpy(rd->thresh_mult_sub8x8, thresh_mult[idx], sizeof(thresh_mult[idx]));
696 }
697
698 void vp9_update_rd_thresh_fact(int (*factor_buf)[MAX_MODES], int rd_thresh,
699 int bsize, int best_mode_index) {
700 if (rd_thresh > 0) {
701 const int top_mode = bsize < BLOCK_8X8 ? MAX_REFS : MAX_MODES;
702 int mode;
703 for (mode = 0; mode < top_mode; ++mode) {
704 const BLOCK_SIZE min_size = VPXMAX(bsize - 1, BLOCK_4X4);
705 const BLOCK_SIZE max_size = VPXMIN(bsize + 2, BLOCK_64X64);
706 BLOCK_SIZE bs;
707 for (bs = min_size; bs <= max_size; ++bs) {
708 int *const fact = &factor_buf[bs][mode];
709 if (mode == best_mode_index) {
710 *fact -= (*fact >> 4);
711 } else {
712 *fact = VPXMIN(*fact + RD_THRESH_INC, rd_thresh * RD_THRESH_MAX_FACT);
713 }
714 }
715 }
716 }
717 }
718 #endif
719 #if INTER_INTRA_BIAS
720 #if 0
721 int vp9_get_intra_cost_penalty(const VP9_COMP *const cpi, BLOCK_SIZE bsize,
722 int qindex, int qdelta) {
723 #else
vp9_get_intra_cost_penalty(BLOCK_SIZE bsize,int qindex,int qdelta,int is_flat_noise)724 int vp9_get_intra_cost_penalty(BLOCK_SIZE bsize, int qindex, int qdelta, int is_flat_noise) {
725 #endif
726 // Reduce the intra cost penalty for small blocks (<=16x16).
727 int reduction_fac =
728 (bsize <= BLOCK_16X16) ? ((bsize <= BLOCK_8X8) ? 4 : 2) : 0;
729
730 #if 0
731 if (cpi->noise_estimate.enabled && cpi->noise_estimate.level == kHigh)
732 // Don't reduce intra cost penalty if estimated noise level is high.
733 reduction_fac = 0;
734 #else
735 if (is_flat_noise)
736 // Don't reduce intra cost penalty if estimated noise level is high.
737 reduction_fac = 0;
738 #endif
739 // Always use VPX_BITS_8 as input here because the penalty is applied
740 // to rate not distortion so we want a consistent penalty for all bit
741 // depths. If the actual bit depth were passed in here then the value
742 // retured by eb_vp9_dc_quant() would scale with the bit depth and we would
743 // then need to apply inverse scaling to correct back to a bit depth
744 // independent rate penalty.
745 return (20 * eb_vp9_dc_quant(qindex, qdelta, VPX_BITS_8)) >> reduction_fac;
746 }
747 #endif
748