1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <assert.h>
12 #include <math.h>
13 #include <stdint.h>
14 
15 #include "vpx_dsp_common.h"
16 //#include "vpx_mem.h"
17 #include "bitops.h"
18 #include "mem.h"
19 //#include "system_state.h"
20 
21 #include "vp9_entropy.h"
22 #include "vp9_entropymode.h"
23 #include "EbUtility.h"
24 #include "vp9_pred_common.h"
25 #include "vp9_quant_common.h"
26 
27 #include "vp9_cost.h"
28 //#include "vp9_encodemb.h"
29 #include "vp9_encodemv.h"
30 #include "vp9_encoder.h"
31 //#include "vp9_mcomp.h"
32 #include "vp9_ratectrl.h"
33 #include "vp9_rd.h"
34 #include "vp9_tokenize.h"
35 
36 #define RD_THRESH_POW 1.25
37 
38 // Factor to weigh the rate for switchable interp filters.
39 #define SWITCHABLE_INTERP_RATE_FACTOR 1
40 
eb_vp9_rd_cost_reset(RD_COST * rd_cost)41 void eb_vp9_rd_cost_reset(RD_COST *rd_cost) {
42   rd_cost->rate = INT_MAX;
43   rd_cost->dist = INT64_MAX;
44   rd_cost->rdcost = INT64_MAX;
45 }
46 
eb_vp9_rd_cost_init(RD_COST * rd_cost)47 void eb_vp9_rd_cost_init(RD_COST *rd_cost) {
48   rd_cost->rate = 0;
49   rd_cost->dist = 0;
50   rd_cost->rdcost = 0;
51 }
52 
fill_mode_costs(VP9_COMP * cpi)53 static void fill_mode_costs(VP9_COMP *cpi) {
54   const FRAME_CONTEXT *const fc = cpi->common.fc;
55   int i, j;
56 
57   for (i = 0; i < INTRA_MODES; ++i) {
58     for (j = 0; j < INTRA_MODES; ++j) {
59       eb_vp9_cost_tokens(cpi->y_mode_costs[i][j], eb_vp9_kf_y_mode_prob[i][j],
60                       eb_vp9_intra_mode_tree);
61     }
62   }
63 
64   eb_vp9_cost_tokens(cpi->mbmode_cost, fc->y_mode_prob[1], eb_vp9_intra_mode_tree);
65   for (i = 0; i < INTRA_MODES; ++i) {
66     eb_vp9_cost_tokens(cpi->intra_uv_mode_cost[KEY_FRAME][i],
67                     eb_vp9_kf_uv_mode_prob[i], eb_vp9_intra_mode_tree);
68     eb_vp9_cost_tokens(cpi->intra_uv_mode_cost[INTER_FRAME][i],
69                     fc->uv_mode_prob[i], eb_vp9_intra_mode_tree);
70   }
71 
72   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) {
73     eb_vp9_cost_tokens(cpi->switchable_interp_costs[i],
74                     fc->switchable_interp_prob[i], eb_vp9_switchable_interp_tree);
75   }
76 
77   for (i = TX_8X8; i < TX_SIZES; ++i) {
78     for (j = 0; j < TX_SIZE_CONTEXTS; ++j) {
79       const vpx_prob *tx_probs = get_tx_probs((TX_SIZE)i, j, &fc->tx_probs);
80       int k;
81       for (k = 0; k <= i; ++k) {
82         int cost = 0;
83         int m;
84         for (m = 0; m <= k - (k == i); ++m) {
85           if (m == k)
86             cost += vp9_cost_zero(tx_probs[m]);
87           else
88             cost += vp9_cost_one(tx_probs[m]);
89         }
90         cpi->tx_size_cost[i - 1][j][k] = cost;
91       }
92     }
93   }
94 }
95 
fill_token_costs(vp9_coeff_cost * c,vp9_coeff_probs_model (* p)[PLANE_TYPES])96 static void fill_token_costs(vp9_coeff_cost *c,
97                              vp9_coeff_probs_model (*p)[PLANE_TYPES]) {
98   int i, j, k, l;
99   TX_SIZE t;
100   for (t = TX_4X4; t <= TX_32X32; ++t)
101     for (i = 0; i < PLANE_TYPES; ++i)
102       for (j = 0; j < REF_TYPES; ++j)
103         for (k = 0; k < COEF_BANDS; ++k)
104           for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
105             vpx_prob probs[ENTROPY_NODES];
106             eb_vp9_model_to_full_probs(p[t][i][j][k][l], probs);
107             eb_vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs, eb_vp9_coef_tree);
108             eb_vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
109                                  eb_vp9_coef_tree);
110             assert(c[t][i][j][k][0][l][EOB_TOKEN] ==
111                    c[t][i][j][k][1][l][EOB_TOKEN]);
112           }
113 }
114 
115 // Values are now correlated to quantizer.
116 static int sad_per_bit16lut_8[QINDEX_RANGE];
117 static int sad_per_bit4lut_8[QINDEX_RANGE];
118 
119 #if CONFIG_VP9_HIGHBITDEPTH
120 static int sad_per_bit16lut_10[QINDEX_RANGE];
121 static int sad_per_bit4lut_10[QINDEX_RANGE];
122 static int sad_per_bit16lut_12[QINDEX_RANGE];
123 static int sad_per_bit4lut_12[QINDEX_RANGE];
124 #endif
125 
init_me_luts_bd(int * bit16lut,int * bit4lut,int range,vpx_bit_depth_t bit_depth)126 static void init_me_luts_bd(int *bit16lut, int *bit4lut, int range,
127                             vpx_bit_depth_t bit_depth) {
128   int i;
129   // Initialize the sad lut tables using a formulaic calculation for now.
130   // This is to make it easier to resolve the impact of experimental changes
131   // to the quantizer tables.
132   for (i = 0; i < range; i++) {
133     const double q = eb_vp9_convert_qindex_to_q(i, bit_depth);
134     bit16lut[i] = (int)(0.0418 * q + 2.4107);
135     bit4lut[i] = (int)(0.063 * q + 2.742);
136   }
137 }
138 
eb_vp9_init_me_luts(void)139 void eb_vp9_init_me_luts(void) {
140   init_me_luts_bd(sad_per_bit16lut_8, sad_per_bit4lut_8, QINDEX_RANGE,
141                   VPX_BITS_8);
142 #if CONFIG_VP9_HIGHBITDEPTH
143   init_me_luts_bd(sad_per_bit16lut_10, sad_per_bit4lut_10, QINDEX_RANGE,
144                   VPX_BITS_10);
145   init_me_luts_bd(sad_per_bit16lut_12, sad_per_bit4lut_12, QINDEX_RANGE,
146                   VPX_BITS_12);
147 #endif
148 }
149 
150 // Note that the element below for frame type "USE_BUF_FRAME", which indicates
151 // that the show frame flag is set, should not be used as no real frame
152 // is encoded so we should not reach here. However, a dummy value
153 // is inserted here to make sure the data structure has the right number
154 // of values assigned.
155 #if 0
156 static const int rd_frame_type_factor[FRAME_UPDATE_TYPES] = { 128, 144, 128,
157                                                               128, 144, 144 };
158 #endif
eb_vp9_compute_rd_mult_based_on_qindex(const VP9_COMP * cpi,int qindex)159 int64_t eb_vp9_compute_rd_mult_based_on_qindex(const VP9_COMP *cpi, int qindex) {
160   const int64_t q = eb_vp9_dc_quant(qindex, 0, cpi->common.bit_depth);
161 #if CONFIG_VP9_HIGHBITDEPTH
162   int64_t rdmult = 0;
163   switch (cpi->common.bit_depth) {
164     case VPX_BITS_8: rdmult = 88 * q * q / 24; break;
165     case VPX_BITS_10: rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 4); break;
166     default:
167       assert(cpi->common.bit_depth == VPX_BITS_12);
168       rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 8);
169       break;
170   }
171 #else
172   int64_t rdmult = 88 * q * q / 24;
173 #endif  // CONFIG_VP9_HIGHBITDEPTH
174   return rdmult;
175 }
176 
eb_vp9_compute_rd_mult(const VP9_COMP * cpi,int qindex)177 int eb_vp9_compute_rd_mult(const VP9_COMP *cpi, int qindex) {
178   int64_t rdmult = eb_vp9_compute_rd_mult_based_on_qindex(cpi, qindex);
179 #if 0
180   if (cpi->oxcf.pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
181     const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
182     const FRAME_UPDATE_TYPE frame_type = gf_group->update_type[gf_group->index];
183     const int boost_index = VPXMIN(15, (cpi->rc.gfu_boost / 100));
184 
185     rdmult = (rdmult * rd_frame_type_factor[frame_type]) >> 7;
186     rdmult += ((rdmult * rd_boost_factor[boost_index]) >> 7);
187   }
188 #endif
189   if (rdmult < 1) rdmult = 1;
190   return (int)rdmult;
191 }
192 
eb_vp9_get_adaptive_rdmult(const VP9_COMP * cpi,double beta)193 int eb_vp9_get_adaptive_rdmult(const VP9_COMP *cpi, double beta) {
194   const VP9_COMMON *cm = &cpi->common;
195   int64_t q = eb_vp9_dc_quant(cm->base_qindex, 0, cpi->common.bit_depth);
196 
197 #if CONFIG_VP9_HIGHBITDEPTH
198   int64_t rdmult = 0;
199   switch (cpi->common.bit_depth) {
200     case VPX_BITS_8: rdmult = (int)((88 * q * q / beta) / 24); break;
201     case VPX_BITS_10:
202       rdmult = ROUND_POWER_OF_TWO((int)((88 * q * q / beta) / 24), 4);
203       break;
204     default:
205       assert(cpi->common.bit_depth == VPX_BITS_12);
206       rdmult = ROUND_POWER_OF_TWO((int)((88 * q * q / beta) / 24), 8);
207       break;
208   }
209 #else
210   int64_t rdmult = (int)((88 * q * q / beta) / 24);
211 #endif  // CONFIG_VP9_HIGHBITDEPTH
212 #if 0
213   if (cpi->oxcf.pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
214     const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
215     const FRAME_UPDATE_TYPE frame_type = gf_group->update_type[gf_group->index];
216     const int boost_index = VPXMIN(15, (cpi->rc.gfu_boost / 100));
217 
218     rdmult = (rdmult * rd_frame_type_factor[frame_type]) >> 7;
219     rdmult += ((rdmult * rd_boost_factor[boost_index]) >> 7);
220   }
221 #endif
222   if (rdmult < 1) rdmult = 1;
223   return (int)rdmult;
224 }
225 
eb_vp9_initialize_me_consts(VP9_COMP * cpi,MACROBLOCK * x,int qindex)226 void eb_vp9_initialize_me_consts(VP9_COMP *cpi, MACROBLOCK *x, int qindex) {
227 #if CONFIG_VP9_HIGHBITDEPTH
228   switch (cpi->common.bit_depth) {
229     case VPX_BITS_8:
230       x->sadperbit16 = sad_per_bit16lut_8[qindex];
231       x->sadperbit4 = sad_per_bit4lut_8[qindex];
232       break;
233     case VPX_BITS_10:
234       x->sadperbit16 = sad_per_bit16lut_10[qindex];
235       x->sadperbit4 = sad_per_bit4lut_10[qindex];
236       break;
237     default:
238       assert(cpi->common.bit_depth == VPX_BITS_12);
239       x->sadperbit16 = sad_per_bit16lut_12[qindex];
240       x->sadperbit4 = sad_per_bit4lut_12[qindex];
241       break;
242   }
243 #else
244   (void)cpi;
245   x->sadperbit16 = sad_per_bit16lut_8[qindex];
246   x->sadperbit4 = sad_per_bit4lut_8[qindex];
247 #endif  // CONFIG_VP9_HIGHBITDEPTH
248 }
249 #if 0
250 static void set_block_thresholds(const VP9_COMMON *cm, RD_OPT *rd) {
251   int i, bsize, segment_id;
252 
253   for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
254     const int qindex =
255         clamp(eb_vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex) +
256                   cm->y_dc_delta_q,
257               0, MAXQ);
258     const int q = compute_rd_thresh_factor(qindex, cm->bit_depth);
259 
260     for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
261       // Threshold here seems unnecessarily harsh but fine given actual
262       // range of values used for cpi->sf.thresh_mult[].
263       const int t = q * rd_thresh_block_size_factor[bsize];
264       const int thresh_max = INT_MAX / t;
265 
266       if (bsize >= BLOCK_8X8) {
267         for (i = 0; i < MAX_MODES; ++i)
268           rd->threshes[segment_id][bsize][i] = rd->thresh_mult[i] < thresh_max
269                                                    ? rd->thresh_mult[i] * t / 4
270                                                    : INT_MAX;
271       } else {
272         for (i = 0; i < MAX_REFS; ++i)
273           rd->threshes[segment_id][bsize][i] =
274               rd->thresh_mult_sub8x8[i] < thresh_max
275                   ? rd->thresh_mult_sub8x8[i] * t / 4
276                   : INT_MAX;
277       }
278     }
279   }
280 }
281 #endif
282 
283 #if 1
eb_vp9_initialize_rd_consts(VP9_COMP * cpi)284 void eb_vp9_initialize_rd_consts(VP9_COMP *cpi) {
285   VP9_COMMON *const cm = &cpi->common;
286   MACROBLOCK *const x = &cpi->td.mb;
287   MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
288   RD_OPT *const rd = &cpi->rd;
289   int i;
290 #if 0
291   vpx_clear_system_state();
292 #endif
293   rd->RDDIV = RDDIV_BITS;  // In bits (to multiply D by 128).
294   rd->RDMULT = eb_vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q);
295 #if VP9_RD
296   rd->rd_mult_sad = (int)MAX(round(sqrtf((float)rd->RDMULT / 128) * 128),1);
297 
298 #endif
299   set_error_per_bit(x, rd->RDMULT);
300 
301   x->select_tx_size = (/*cpi->sf.tx_size_search_method == USE_LARGESTALL &&*/
302                        cm->frame_type != KEY_FRAME)
303                           ? 0
304                           : 1;
305 #if 0
306   set_block_thresholds(cm, rd);
307 #endif
308   set_partition_probs(cm, xd);
309 #if 0
310   if (cpi->oxcf.pass == 1) {
311     if (!frame_is_intra_only(cm))
312       eb_vp9_build_nmv_cost_table(
313           x->nmvjointcost,
314           cm->allow_high_precision_mv ? x->nmvcost_hp : x->nmvcost,
315           &cm->fc->nmvc, cm->allow_high_precision_mv);
316   } else
317 #endif
318   {
319     //if (!cpi->sf.use_nonrd_pick_mode || cm->frame_type == KEY_FRAME)
320       fill_token_costs(x->token_costs, cm->fc->coef_probs);
321 
322     //if (cpi->sf.partition_search_type != VAR_BASED_PARTITION ||
323     //    cm->frame_type == KEY_FRAME)
324     {
325       for (i = 0; i < PARTITION_CONTEXTS; ++i)
326         eb_vp9_cost_tokens(cpi->partition_cost[i], get_partition_probs(xd, i),
327                         eb_vp9_partition_tree);
328     }
329 
330     //if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1 ||
331     //    cm->frame_type == KEY_FRAME)
332     {
333       fill_mode_costs(cpi);
334 
335       if (!frame_is_intra_only(cm)) {
336         eb_vp9_build_nmv_cost_table(
337             x->nmvjointcost,
338             cm->allow_high_precision_mv ? x->nmvcost_hp : x->nmvcost,
339             &cm->fc->nmvc, cm->allow_high_precision_mv);
340 
341         for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
342           eb_vp9_cost_tokens((int *)cpi->inter_mode_cost[i],
343                           cm->fc->inter_mode_probs[i], eb_vp9_inter_mode_tree);
344       }
345     }
346   }
347 }
348 #endif
349 // NOTE: The tables below must be of the same size.
350 
351 // The functions described below are sampled at the four most significant
352 // bits of x^2 + 8 / 256.
353 
354 // Normalized rate:
355 // This table models the rate for a Laplacian source with given variance
356 // when quantized with a uniform quantizer with given stepsize. The
357 // closed form expression is:
358 // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
359 // where r = exp(-sqrt(2) * x) and x = qp_step / sqrt(variance),
360 // and H(x) is the binary entropy function.
361 static const int rate_tab_q10[] = {
362   65536, 6086, 5574, 5275, 5063, 4899, 4764, 4651, 4553, 4389, 4255, 4142, 4044,
363   3958,  3881, 3811, 3748, 3635, 3538, 3453, 3376, 3307, 3244, 3186, 3133, 3037,
364   2952,  2877, 2809, 2747, 2690, 2638, 2589, 2501, 2423, 2353, 2290, 2232, 2179,
365   2130,  2084, 2001, 1928, 1862, 1802, 1748, 1698, 1651, 1608, 1530, 1460, 1398,
366   1342,  1290, 1243, 1199, 1159, 1086, 1021, 963,  911,  864,  821,  781,  745,
367   680,   623,  574,  530,  490,  455,  424,  395,  345,  304,  269,  239,  213,
368   190,   171,  154,  126,  104,  87,   73,   61,   52,   44,   38,   28,   21,
369   16,    12,   10,   8,    6,    5,    3,    2,    1,    1,    1,    0,    0,
370 };
371 
372 // Normalized distortion:
373 // This table models the normalized distortion for a Laplacian source
374 // with given variance when quantized with a uniform quantizer
375 // with given stepsize. The closed form expression is:
376 // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
377 // where x = qp_step / sqrt(variance).
378 // Note the actual distortion is Dn * variance.
379 static const int dist_tab_q10[] = {
380   0,    0,    1,    1,    1,    2,    2,    2,    3,    3,    4,    5,    5,
381   6,    7,    7,    8,    9,    11,   12,   13,   15,   16,   17,   18,   21,
382   24,   26,   29,   31,   34,   36,   39,   44,   49,   54,   59,   64,   69,
383   73,   78,   88,   97,   106,  115,  124,  133,  142,  151,  167,  184,  200,
384   215,  231,  245,  260,  274,  301,  327,  351,  375,  397,  418,  439,  458,
385   495,  528,  559,  587,  613,  637,  659,  680,  717,  749,  777,  801,  823,
386   842,  859,  874,  899,  919,  936,  949,  960,  969,  977,  983,  994,  1001,
387   1006, 1010, 1013, 1015, 1017, 1018, 1020, 1022, 1022, 1023, 1023, 1023, 1024,
388 };
389 static const int xsq_iq_q10[] = {
390   0,      4,      8,      12,     16,     20,     24,     28,     32,
391   40,     48,     56,     64,     72,     80,     88,     96,     112,
392   128,    144,    160,    176,    192,    208,    224,    256,    288,
393   320,    352,    384,    416,    448,    480,    544,    608,    672,
394   736,    800,    864,    928,    992,    1120,   1248,   1376,   1504,
395   1632,   1760,   1888,   2016,   2272,   2528,   2784,   3040,   3296,
396   3552,   3808,   4064,   4576,   5088,   5600,   6112,   6624,   7136,
397   7648,   8160,   9184,   10208,  11232,  12256,  13280,  14304,  15328,
398   16352,  18400,  20448,  22496,  24544,  26592,  28640,  30688,  32736,
399   36832,  40928,  45024,  49120,  53216,  57312,  61408,  65504,  73696,
400   81888,  90080,  98272,  106464, 114656, 122848, 131040, 147424, 163808,
401   180192, 196576, 212960, 229344, 245728,
402 };
403 
model_rd_norm(int xsq_q10,int * r_q10,int * d_q10)404 static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
405   const int tmp = (xsq_q10 >> 2) + 8;
406   const int k = get_msb(tmp) - 3;
407   const int xq = (k << 3) + ((tmp >> k) & 0x7);
408   const int one_q10 = 1 << 10;
409   const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k);
410   const int b_q10 = one_q10 - a_q10;
411   *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
412   *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
413 }
414 
model_rd_norm_vec(int xsq_q10[MAX_MB_PLANE],int r_q10[MAX_MB_PLANE],int d_q10[MAX_MB_PLANE])415 static void model_rd_norm_vec(int xsq_q10[MAX_MB_PLANE],
416                               int r_q10[MAX_MB_PLANE],
417                               int d_q10[MAX_MB_PLANE]) {
418   int i;
419   const int one_q10 = 1 << 10;
420   for (i = 0; i < MAX_MB_PLANE; ++i) {
421     const int tmp = (xsq_q10[i] >> 2) + 8;
422     const int k = get_msb(tmp) - 3;
423     const int xq = (k << 3) + ((tmp >> k) & 0x7);
424     const int a_q10 = ((xsq_q10[i] - xsq_iq_q10[xq]) << 10) >> (2 + k);
425     const int b_q10 = one_q10 - a_q10;
426     r_q10[i] = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
427     d_q10[i] = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
428   }
429 }
430 
431 static const uint32_t MAX_XSQ_Q10 = 245727;
432 
eb_vp9_model_rd_from_var_lapndz(unsigned int var,unsigned int n_log2,unsigned int qstep,int * rate,int64_t * dist)433 void eb_vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n_log2,
434                                   unsigned int qstep, int *rate,
435                                   int64_t *dist) {
436   // This function models the rate and distortion for a Laplacian
437   // source with given variance when quantized with a uniform quantizer
438   // with given stepsize. The closed form expressions are in:
439   // Hang and Chen, "Source Model for transform video coder and its
440   // application - Part I: Fundamental Theory", IEEE Trans. Circ.
441   // Sys. for Video Tech., April 1997.
442   if (var == 0) {
443     *rate = 0;
444     *dist = 0;
445   } else {
446     int d_q10, r_q10;
447     const uint64_t xsq_q10_64 =
448         (((uint64_t)qstep * qstep << (n_log2 + 10)) + (var >> 1)) / var;
449     const int xsq_q10 = (int)VPXMIN(xsq_q10_64, MAX_XSQ_Q10);
450     model_rd_norm(xsq_q10, &r_q10, &d_q10);
451     *rate = ROUND_POWER_OF_TWO(r_q10 << n_log2, 10 - VP9_PROB_COST_SHIFT);
452     *dist = (var * (int64_t)d_q10 + 512) >> 10;
453   }
454 }
455 
456 // Implements a fixed length vector form of eb_vp9_model_rd_from_var_lapndz where
457 // vectors are of length MAX_MB_PLANE and all elements of var are non-zero.
eb_vp9_model_rd_from_var_lapndz_vec(unsigned int var[MAX_MB_PLANE],unsigned int n_log2[MAX_MB_PLANE],unsigned int qstep[MAX_MB_PLANE],int64_t * rate_sum,int64_t * dist_sum)458 void eb_vp9_model_rd_from_var_lapndz_vec(unsigned int var[MAX_MB_PLANE],
459                                       unsigned int n_log2[MAX_MB_PLANE],
460                                       unsigned int qstep[MAX_MB_PLANE],
461                                       int64_t *rate_sum, int64_t *dist_sum) {
462   int i;
463   int xsq_q10[MAX_MB_PLANE], d_q10[MAX_MB_PLANE], r_q10[MAX_MB_PLANE];
464   for (i = 0; i < MAX_MB_PLANE; ++i) {
465     const uint64_t xsq_q10_64 =
466         (((uint64_t)qstep[i] * qstep[i] << (n_log2[i] + 10)) + (var[i] >> 1)) /
467         var[i];
468     xsq_q10[i] = (int)VPXMIN(xsq_q10_64, MAX_XSQ_Q10);
469   }
470   model_rd_norm_vec(xsq_q10, r_q10, d_q10);
471   for (i = 0; i < MAX_MB_PLANE; ++i) {
472     int rate =
473         ROUND_POWER_OF_TWO(r_q10[i] << n_log2[i], 10 - VP9_PROB_COST_SHIFT);
474     int64_t dist = (var[i] * (int64_t)d_q10[i] + 512) >> 10;
475     *rate_sum += rate;
476     *dist_sum += dist;
477   }
478 }
479 
eb_vp9_get_entropy_contexts(BLOCK_SIZE bsize,TX_SIZE tx_size,const struct macroblockd_plane * pd,ENTROPY_CONTEXT t_above[16],ENTROPY_CONTEXT t_left[16])480 void eb_vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
481                               const struct macroblockd_plane *pd,
482                               ENTROPY_CONTEXT t_above[16],
483                               ENTROPY_CONTEXT t_left[16]) {
484   const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
485   const int num_4x4_w = eb_vp9_num_4x4_blocks_wide_lookup[plane_bsize];
486   const int num_4x4_h = eb_vp9_num_4x4_blocks_high_lookup[plane_bsize];
487   const ENTROPY_CONTEXT *const above = pd->above_context;
488   const ENTROPY_CONTEXT *const left = pd->left_context;
489 
490   int i;
491   switch (tx_size) {
492     case TX_4X4:
493       memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
494       memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
495       break;
496     case TX_8X8:
497         for (i = 0; i < num_4x4_w; i += 2) {
498             assert(i < 16);
499             t_above[i] = !!*(const uint16_t *)&above[i];
500         }
501         for (i = 0; i < num_4x4_h; i += 2) {
502             assert(i < 16);
503             t_left[i] = !!*(const uint16_t *)&left[i];
504         }
505       break;
506     case TX_16X16:
507         for (i = 0; i < num_4x4_w; i += 4) {
508             assert(i < 16);
509             t_above[i] = !!*(const uint32_t *)&above[i];
510         }
511         for (i = 0; i < num_4x4_h; i += 4) {
512             assert(i < 16);
513             t_left[i] = !!*(const uint32_t *)&left[i];
514         }
515       break;
516     default:
517       assert(tx_size == TX_32X32);
518       for (i = 0; i < num_4x4_w; i += 8) {
519           assert(i < 16);
520           t_above[i] = !!*(const uint64_t *)&above[i];
521       }
522       for (i = 0; i < num_4x4_h; i += 8) {
523           assert(i < 16);
524           t_left[i] = !!*(const uint64_t *)&left[i];
525       }
526       break;
527   }
528 }
529 #if 0
530 void vp9_mv_pred(VP9_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer,
531                  int ref_y_stride, int ref_frame, BLOCK_SIZE block_size) {
532   int i;
533   int zero_seen = 0;
534   int best_index = 0;
535   int best_sad = INT_MAX;
536   int this_sad = INT_MAX;
537   int max_mv = 0;
538   int near_same_nearest;
539   uint8_t *src_y_ptr = x->plane[0].src.buf;
540   uint8_t *ref_y_ptr;
541   const int num_mv_refs =
542       MAX_MV_REF_CANDIDATES + (block_size < x->max_partition_size);
543 
544   MV pred_mv[3];
545   pred_mv[0] = x->mbmi_ext->ref_mvs[ref_frame][0].as_mv;
546   pred_mv[1] = x->mbmi_ext->ref_mvs[ref_frame][1].as_mv;
547   pred_mv[2] = x->pred_mv[ref_frame];
548   assert(num_mv_refs <= (int)(sizeof(pred_mv) / sizeof(pred_mv[0])));
549 
550   near_same_nearest = x->mbmi_ext->ref_mvs[ref_frame][0].as_int ==
551                       x->mbmi_ext->ref_mvs[ref_frame][1].as_int;
552 
553   // Get the sad for each candidate reference mv.
554   for (i = 0; i < num_mv_refs; ++i) {
555     const MV *this_mv = &pred_mv[i];
556     int fp_row, fp_col;
557     if (this_mv->row == INT16_MAX || this_mv->col == INT16_MAX) continue;
558     if (i == 1 && near_same_nearest) continue;
559     fp_row = (this_mv->row + 3 + (this_mv->row >= 0)) >> 3;
560     fp_col = (this_mv->col + 3 + (this_mv->col >= 0)) >> 3;
561     max_mv = VPXMAX(max_mv, VPXMAX(abs(this_mv->row), abs(this_mv->col)) >> 3);
562 
563     if (fp_row == 0 && fp_col == 0 && zero_seen) continue;
564     zero_seen |= (fp_row == 0 && fp_col == 0);
565 
566     ref_y_ptr = &ref_y_buffer[ref_y_stride * fp_row + fp_col];
567     // Find sad for current vector.
568     this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
569                                            ref_y_ptr, ref_y_stride);
570     // Note if it is the best so far.
571     if (this_sad < best_sad) {
572       best_sad = this_sad;
573       best_index = i;
574     }
575   }
576 
577   // Note the index of the mv that worked best in the reference list.
578   x->mv_best_ref_index[ref_frame] = best_index;
579   x->max_mv_context[ref_frame] = max_mv;
580   x->pred_mv_sad[ref_frame] = best_sad;
581 }
582 
583 void vp9_setup_pred_block(const MACROBLOCKD *xd,
584                           struct buf_2d dst[MAX_MB_PLANE],
585                           const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
586                           const struct scale_factors *scale,
587                           const struct scale_factors *scale_uv) {
588   int i;
589 
590   dst[0].buf = src->y_buffer;
591   dst[0].stride = src->y_stride;
592   dst[1].buf = src->u_buffer;
593   dst[2].buf = src->v_buffer;
594   dst[1].stride = dst[2].stride = src->uv_stride;
595 
596   for (i = 0; i < MAX_MB_PLANE; ++i) {
597     setup_pred_plane(dst + i, dst[i].buf, dst[i].stride, mi_row, mi_col,
598                      i ? scale_uv : scale, xd->plane[i].subsampling_x,
599                      xd->plane[i].subsampling_y);
600   }
601 }
602 
603 int vp9_raster_block_offset(BLOCK_SIZE plane_bsize, int raster_block,
604                             int stride) {
605   const int bw = eb_vp9_b_width_log2_lookup[plane_bsize];
606   const int y = 4 * (raster_block >> bw);
607   const int x = 4 * (raster_block & ((1 << bw) - 1));
608   return y * stride + x;
609 }
610 
611 int16_t *vp9_raster_block_offset_int16(BLOCK_SIZE plane_bsize, int raster_block,
612                                        int16_t *base) {
613   const int stride = 4 * eb_vp9_num_4x4_blocks_wide_lookup[plane_bsize];
614   return base + vp9_raster_block_offset(plane_bsize, raster_block, stride);
615 }
616 
617 YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi,
618                                              int ref_frame) {
619   const VP9_COMMON *const cm = &cpi->common;
620   const int scaled_idx = cpi->scaled_ref_idx[ref_frame - 1];
621   const int ref_idx = get_ref_frame_buf_idx(cpi, ref_frame);
622   return (scaled_idx != ref_idx && scaled_idx != INVALID_IDX)
623              ? &cm->buffer_pool->frame_bufs[scaled_idx].buf
624              : NULL;
625 }
626 
627 int vp9_get_switchable_rate(const VP9_COMP *cpi, const MACROBLOCKD *const xd) {
628   const ModeInfo *const mi = xd->mi[0];
629   const int ctx = get_pred_context_switchable_interp(xd);
630   return SWITCHABLE_INTERP_RATE_FACTOR *
631          cpi->switchable_interp_costs[ctx][mi->interp_filter];
632 }
633 
634 void vp9_set_rd_speed_thresholds(VP9_COMP *cpi) {
635   int i;
636   RD_OPT *const rd = &cpi->rd;
637   SPEED_FEATURES *const sf = &cpi->sf;
638 
639   // Set baseline threshold values.
640   for (i = 0; i < MAX_MODES; ++i)
641     rd->thresh_mult[i] = cpi->oxcf.mode == BEST ? -500 : 0;
642 
643   if (sf->adaptive_rd_thresh) {
644     rd->thresh_mult[THR_NEARESTMV] = 300;
645     rd->thresh_mult[THR_NEARESTG] = 300;
646     rd->thresh_mult[THR_NEARESTA] = 300;
647   } else {
648     rd->thresh_mult[THR_NEARESTMV] = 0;
649     rd->thresh_mult[THR_NEARESTG] = 0;
650     rd->thresh_mult[THR_NEARESTA] = 0;
651   }
652 
653   rd->thresh_mult[THR_DC] += 1000;
654 
655   rd->thresh_mult[THR_NEWMV] += 1000;
656   rd->thresh_mult[THR_NEWA] += 1000;
657   rd->thresh_mult[THR_NEWG] += 1000;
658 
659   rd->thresh_mult[THR_NEARMV] += 1000;
660   rd->thresh_mult[THR_NEARA] += 1000;
661   rd->thresh_mult[THR_COMP_NEARESTLA] += 1000;
662   rd->thresh_mult[THR_COMP_NEARESTGA] += 1000;
663 
664   rd->thresh_mult[THR_TM] += 1000;
665 
666   rd->thresh_mult[THR_COMP_NEARLA] += 1500;
667   rd->thresh_mult[THR_COMP_NEWLA] += 2000;
668   rd->thresh_mult[THR_NEARG] += 1000;
669   rd->thresh_mult[THR_COMP_NEARGA] += 1500;
670   rd->thresh_mult[THR_COMP_NEWGA] += 2000;
671 
672   rd->thresh_mult[THR_ZEROMV] += 2000;
673   rd->thresh_mult[THR_ZEROG] += 2000;
674   rd->thresh_mult[THR_ZEROA] += 2000;
675   rd->thresh_mult[THR_COMP_ZEROLA] += 2500;
676   rd->thresh_mult[THR_COMP_ZEROGA] += 2500;
677 
678   rd->thresh_mult[THR_H_PRED] += 2000;
679   rd->thresh_mult[THR_V_PRED] += 2000;
680   rd->thresh_mult[THR_D45_PRED] += 2500;
681   rd->thresh_mult[THR_D135_PRED] += 2500;
682   rd->thresh_mult[THR_D117_PRED] += 2500;
683   rd->thresh_mult[THR_D153_PRED] += 2500;
684   rd->thresh_mult[THR_D207_PRED] += 2500;
685   rd->thresh_mult[THR_D63_PRED] += 2500;
686 }
687 
688 void vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) {
689   static const int thresh_mult[2][MAX_REFS] = {
690     { 2500, 2500, 2500, 4500, 4500, 2500 },
691     { 2000, 2000, 2000, 4000, 4000, 2000 }
692   };
693   RD_OPT *const rd = &cpi->rd;
694   const int idx = cpi->oxcf.mode == BEST;
695   memcpy(rd->thresh_mult_sub8x8, thresh_mult[idx], sizeof(thresh_mult[idx]));
696 }
697 
698 void vp9_update_rd_thresh_fact(int (*factor_buf)[MAX_MODES], int rd_thresh,
699                                int bsize, int best_mode_index) {
700   if (rd_thresh > 0) {
701     const int top_mode = bsize < BLOCK_8X8 ? MAX_REFS : MAX_MODES;
702     int mode;
703     for (mode = 0; mode < top_mode; ++mode) {
704       const BLOCK_SIZE min_size = VPXMAX(bsize - 1, BLOCK_4X4);
705       const BLOCK_SIZE max_size = VPXMIN(bsize + 2, BLOCK_64X64);
706       BLOCK_SIZE bs;
707       for (bs = min_size; bs <= max_size; ++bs) {
708         int *const fact = &factor_buf[bs][mode];
709         if (mode == best_mode_index) {
710           *fact -= (*fact >> 4);
711         } else {
712           *fact = VPXMIN(*fact + RD_THRESH_INC, rd_thresh * RD_THRESH_MAX_FACT);
713         }
714       }
715     }
716   }
717 }
718 #endif
719 #if INTER_INTRA_BIAS
720 #if 0
721 int vp9_get_intra_cost_penalty(const VP9_COMP *const cpi, BLOCK_SIZE bsize,
722     int qindex, int qdelta) {
723 #else
vp9_get_intra_cost_penalty(BLOCK_SIZE bsize,int qindex,int qdelta,int is_flat_noise)724 int vp9_get_intra_cost_penalty(BLOCK_SIZE bsize, int qindex, int qdelta, int is_flat_noise) {
725 #endif
726   // Reduce the intra cost penalty for small blocks (<=16x16).
727   int reduction_fac =
728       (bsize <= BLOCK_16X16) ? ((bsize <= BLOCK_8X8) ? 4 : 2) : 0;
729 
730 #if 0
731   if (cpi->noise_estimate.enabled && cpi->noise_estimate.level == kHigh)
732     // Don't reduce intra cost penalty if estimated noise level is high.
733     reduction_fac = 0;
734 #else
735   if (is_flat_noise)
736       // Don't reduce intra cost penalty if estimated noise level is high.
737       reduction_fac = 0;
738 #endif
739   // Always use VPX_BITS_8 as input here because the penalty is applied
740   // to rate not distortion so we want a consistent penalty for all bit
741   // depths. If the actual bit depth were passed in here then the value
742   // retured by eb_vp9_dc_quant() would scale with the bit depth and we would
743   // then need to apply inverse scaling to correct back to a bit depth
744   // independent rate penalty.
745   return (20 * eb_vp9_dc_quant(qindex, qdelta, VPX_BITS_8)) >> reduction_fac;
746 }
747 #endif
748