1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #ifndef AOM_AV1_ENCODER_RD_H_
13 #define AOM_AV1_ENCODER_RD_H_
14 
15 #include <limits.h>
16 
17 #include "av1/common/blockd.h"
18 
19 #include "av1/encoder/block.h"
20 #include "av1/encoder/context_tree.h"
21 #include "av1/encoder/cost.h"
22 
23 #ifdef __cplusplus
24 extern "C" {
25 #endif
26 
27 #define RDDIV_BITS 7
28 #define RD_EPB_SHIFT 6
29 
30 #define RDCOST(RM, R, D)                                            \
31   (ROUND_POWER_OF_TWO(((int64_t)(R)) * (RM), AV1_PROB_COST_SHIFT) + \
32    ((D) * (1 << RDDIV_BITS)))
33 
34 #define RDCOST_DBL(RM, R, D)                                       \
35   (((((double)(R)) * (RM)) / (double)(1 << AV1_PROB_COST_SHIFT)) + \
36    ((double)(D) * (1 << RDDIV_BITS)))
37 
38 #define QIDX_SKIP_THRESH 115
39 
40 #define MV_COST_WEIGHT 108
41 #define MV_COST_WEIGHT_SUB 120
42 
43 #define RD_THRESH_MAX_FACT 64
44 #define RD_THRESH_INC 1
45 
46 // Factor to weigh the rate for switchable interp filters.
47 #define SWITCHABLE_INTERP_RATE_FACTOR 1
48 
49 // This enumerator type needs to be kept aligned with the mode order in
50 // const MODE_DEFINITION av1_mode_order[MAX_MODES] used in the rd code.
51 typedef enum {
52   THR_NEARESTMV,
53   THR_NEARESTL2,
54   THR_NEARESTL3,
55   THR_NEARESTB,
56   THR_NEARESTA2,
57   THR_NEARESTA,
58   THR_NEARESTG,
59 
60   THR_NEWMV,
61   THR_NEWL2,
62   THR_NEWL3,
63   THR_NEWB,
64   THR_NEWA2,
65   THR_NEWA,
66   THR_NEWG,
67 
68   THR_NEARMV,
69   THR_NEARL2,
70   THR_NEARL3,
71   THR_NEARB,
72   THR_NEARA2,
73   THR_NEARA,
74   THR_NEARG,
75 
76   THR_GLOBALMV,
77   THR_GLOBALL2,
78   THR_GLOBALL3,
79   THR_GLOBALB,
80   THR_GLOBALA2,
81   THR_GLOBALA,
82   THR_GLOBALG,
83 
84   THR_COMP_NEAREST_NEARESTLA,
85   THR_COMP_NEAREST_NEARESTL2A,
86   THR_COMP_NEAREST_NEARESTL3A,
87   THR_COMP_NEAREST_NEARESTGA,
88   THR_COMP_NEAREST_NEARESTLB,
89   THR_COMP_NEAREST_NEARESTL2B,
90   THR_COMP_NEAREST_NEARESTL3B,
91   THR_COMP_NEAREST_NEARESTGB,
92   THR_COMP_NEAREST_NEARESTLA2,
93   THR_COMP_NEAREST_NEARESTL2A2,
94   THR_COMP_NEAREST_NEARESTL3A2,
95   THR_COMP_NEAREST_NEARESTGA2,
96   THR_COMP_NEAREST_NEARESTLL2,
97   THR_COMP_NEAREST_NEARESTLL3,
98   THR_COMP_NEAREST_NEARESTLG,
99   THR_COMP_NEAREST_NEARESTBA,
100 
101   THR_COMP_NEAR_NEARLA,
102   THR_COMP_NEW_NEARESTLA,
103   THR_COMP_NEAREST_NEWLA,
104   THR_COMP_NEW_NEARLA,
105   THR_COMP_NEAR_NEWLA,
106   THR_COMP_NEW_NEWLA,
107   THR_COMP_GLOBAL_GLOBALLA,
108 
109   THR_COMP_NEAR_NEARL2A,
110   THR_COMP_NEW_NEARESTL2A,
111   THR_COMP_NEAREST_NEWL2A,
112   THR_COMP_NEW_NEARL2A,
113   THR_COMP_NEAR_NEWL2A,
114   THR_COMP_NEW_NEWL2A,
115   THR_COMP_GLOBAL_GLOBALL2A,
116 
117   THR_COMP_NEAR_NEARL3A,
118   THR_COMP_NEW_NEARESTL3A,
119   THR_COMP_NEAREST_NEWL3A,
120   THR_COMP_NEW_NEARL3A,
121   THR_COMP_NEAR_NEWL3A,
122   THR_COMP_NEW_NEWL3A,
123   THR_COMP_GLOBAL_GLOBALL3A,
124 
125   THR_COMP_NEAR_NEARGA,
126   THR_COMP_NEW_NEARESTGA,
127   THR_COMP_NEAREST_NEWGA,
128   THR_COMP_NEW_NEARGA,
129   THR_COMP_NEAR_NEWGA,
130   THR_COMP_NEW_NEWGA,
131   THR_COMP_GLOBAL_GLOBALGA,
132 
133   THR_COMP_NEAR_NEARLB,
134   THR_COMP_NEW_NEARESTLB,
135   THR_COMP_NEAREST_NEWLB,
136   THR_COMP_NEW_NEARLB,
137   THR_COMP_NEAR_NEWLB,
138   THR_COMP_NEW_NEWLB,
139   THR_COMP_GLOBAL_GLOBALLB,
140 
141   THR_COMP_NEAR_NEARL2B,
142   THR_COMP_NEW_NEARESTL2B,
143   THR_COMP_NEAREST_NEWL2B,
144   THR_COMP_NEW_NEARL2B,
145   THR_COMP_NEAR_NEWL2B,
146   THR_COMP_NEW_NEWL2B,
147   THR_COMP_GLOBAL_GLOBALL2B,
148 
149   THR_COMP_NEAR_NEARL3B,
150   THR_COMP_NEW_NEARESTL3B,
151   THR_COMP_NEAREST_NEWL3B,
152   THR_COMP_NEW_NEARL3B,
153   THR_COMP_NEAR_NEWL3B,
154   THR_COMP_NEW_NEWL3B,
155   THR_COMP_GLOBAL_GLOBALL3B,
156 
157   THR_COMP_NEAR_NEARGB,
158   THR_COMP_NEW_NEARESTGB,
159   THR_COMP_NEAREST_NEWGB,
160   THR_COMP_NEW_NEARGB,
161   THR_COMP_NEAR_NEWGB,
162   THR_COMP_NEW_NEWGB,
163   THR_COMP_GLOBAL_GLOBALGB,
164 
165   THR_COMP_NEAR_NEARLA2,
166   THR_COMP_NEW_NEARESTLA2,
167   THR_COMP_NEAREST_NEWLA2,
168   THR_COMP_NEW_NEARLA2,
169   THR_COMP_NEAR_NEWLA2,
170   THR_COMP_NEW_NEWLA2,
171   THR_COMP_GLOBAL_GLOBALLA2,
172 
173   THR_COMP_NEAR_NEARL2A2,
174   THR_COMP_NEW_NEARESTL2A2,
175   THR_COMP_NEAREST_NEWL2A2,
176   THR_COMP_NEW_NEARL2A2,
177   THR_COMP_NEAR_NEWL2A2,
178   THR_COMP_NEW_NEWL2A2,
179   THR_COMP_GLOBAL_GLOBALL2A2,
180 
181   THR_COMP_NEAR_NEARL3A2,
182   THR_COMP_NEW_NEARESTL3A2,
183   THR_COMP_NEAREST_NEWL3A2,
184   THR_COMP_NEW_NEARL3A2,
185   THR_COMP_NEAR_NEWL3A2,
186   THR_COMP_NEW_NEWL3A2,
187   THR_COMP_GLOBAL_GLOBALL3A2,
188 
189   THR_COMP_NEAR_NEARGA2,
190   THR_COMP_NEW_NEARESTGA2,
191   THR_COMP_NEAREST_NEWGA2,
192   THR_COMP_NEW_NEARGA2,
193   THR_COMP_NEAR_NEWGA2,
194   THR_COMP_NEW_NEWGA2,
195   THR_COMP_GLOBAL_GLOBALGA2,
196 
197   THR_COMP_NEAR_NEARLL2,
198   THR_COMP_NEW_NEARESTLL2,
199   THR_COMP_NEAREST_NEWLL2,
200   THR_COMP_NEW_NEARLL2,
201   THR_COMP_NEAR_NEWLL2,
202   THR_COMP_NEW_NEWLL2,
203   THR_COMP_GLOBAL_GLOBALLL2,
204 
205   THR_COMP_NEAR_NEARLL3,
206   THR_COMP_NEW_NEARESTLL3,
207   THR_COMP_NEAREST_NEWLL3,
208   THR_COMP_NEW_NEARLL3,
209   THR_COMP_NEAR_NEWLL3,
210   THR_COMP_NEW_NEWLL3,
211   THR_COMP_GLOBAL_GLOBALLL3,
212 
213   THR_COMP_NEAR_NEARLG,
214   THR_COMP_NEW_NEARESTLG,
215   THR_COMP_NEAREST_NEWLG,
216   THR_COMP_NEW_NEARLG,
217   THR_COMP_NEAR_NEWLG,
218   THR_COMP_NEW_NEWLG,
219   THR_COMP_GLOBAL_GLOBALLG,
220 
221   THR_COMP_NEAR_NEARBA,
222   THR_COMP_NEW_NEARESTBA,
223   THR_COMP_NEAREST_NEWBA,
224   THR_COMP_NEW_NEARBA,
225   THR_COMP_NEAR_NEWBA,
226   THR_COMP_NEW_NEWBA,
227   THR_COMP_GLOBAL_GLOBALBA,
228 
229   THR_DC,
230   THR_PAETH,
231   THR_SMOOTH,
232   THR_SMOOTH_V,
233   THR_SMOOTH_H,
234   THR_H_PRED,
235   THR_V_PRED,
236   THR_D135_PRED,
237   THR_D203_PRED,
238   THR_D157_PRED,
239   THR_D67_PRED,
240   THR_D113_PRED,
241   THR_D45_PRED,
242 
243   MAX_MODES,
244 
245   LAST_SINGLE_REF_MODES = THR_GLOBALG,
246   MAX_SINGLE_REF_MODES = LAST_SINGLE_REF_MODES + 1,
247   LAST_COMP_REF_MODES = THR_COMP_GLOBAL_GLOBALBA,
248   MAX_COMP_REF_MODES = LAST_COMP_REF_MODES + 1
249 } THR_MODES;
250 
251 typedef enum {
252   THR_LAST,
253   THR_LAST2,
254   THR_LAST3,
255   THR_BWDR,
256   THR_ALTR2,
257   THR_GOLD,
258   THR_ALTR,
259 
260   THR_COMP_LA,
261   THR_COMP_L2A,
262   THR_COMP_L3A,
263   THR_COMP_GA,
264 
265   THR_COMP_LB,
266   THR_COMP_L2B,
267   THR_COMP_L3B,
268   THR_COMP_GB,
269 
270   THR_COMP_LA2,
271   THR_COMP_L2A2,
272   THR_COMP_L3A2,
273   THR_COMP_GA2,
274 
275   THR_INTRA,
276 
277   MAX_REFS
278 } THR_MODES_SUB8X8;
279 
280 typedef struct RD_OPT {
281   // Thresh_mult is used to set a threshold for the rd score. A higher value
282   // means that we will accept the best mode so far more often. This number
283   // is used in combination with the current block size, and thresh_freq_fact
284   // to pick a threshold.
285   int thresh_mult[MAX_MODES];
286   int thresh_mult_sub8x8[MAX_REFS];
287 
288   int threshes[MAX_SEGMENTS][BLOCK_SIZES_ALL][MAX_MODES];
289 
290   int64_t prediction_type_threshes[REF_FRAMES][REFERENCE_MODES];
291 
292   int RDMULT;
293 } RD_OPT;
294 
av1_init_rd_stats(RD_STATS * rd_stats)295 static INLINE void av1_init_rd_stats(RD_STATS *rd_stats) {
296 #if CONFIG_RD_DEBUG
297   int plane;
298 #endif
299   rd_stats->rate = 0;
300   rd_stats->dist = 0;
301   rd_stats->rdcost = 0;
302   rd_stats->sse = 0;
303   rd_stats->skip = 1;
304   rd_stats->zero_rate = 0;
305   rd_stats->invalid_rate = 0;
306   rd_stats->ref_rdcost = INT64_MAX;
307 #if CONFIG_RD_DEBUG
308   // This may run into problems when monochrome video is
309   // encoded, as there will only be 1 plane
310   for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
311     rd_stats->txb_coeff_cost[plane] = 0;
312     {
313       int r, c;
314       for (r = 0; r < TXB_COEFF_COST_MAP_SIZE; ++r)
315         for (c = 0; c < TXB_COEFF_COST_MAP_SIZE; ++c)
316           rd_stats->txb_coeff_cost_map[plane][r][c] = 0;
317     }
318   }
319 #endif
320 }
321 
av1_invalid_rd_stats(RD_STATS * rd_stats)322 static INLINE void av1_invalid_rd_stats(RD_STATS *rd_stats) {
323 #if CONFIG_RD_DEBUG
324   int plane;
325 #endif
326   rd_stats->rate = INT_MAX;
327   rd_stats->dist = INT64_MAX;
328   rd_stats->rdcost = INT64_MAX;
329   rd_stats->sse = INT64_MAX;
330   rd_stats->skip = 0;
331   rd_stats->zero_rate = 0;
332   rd_stats->invalid_rate = 1;
333   rd_stats->ref_rdcost = INT64_MAX;
334 #if CONFIG_RD_DEBUG
335   // This may run into problems when monochrome video is
336   // encoded, as there will only be 1 plane
337   for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
338     rd_stats->txb_coeff_cost[plane] = INT_MAX;
339     {
340       int r, c;
341       for (r = 0; r < TXB_COEFF_COST_MAP_SIZE; ++r)
342         for (c = 0; c < TXB_COEFF_COST_MAP_SIZE; ++c)
343           rd_stats->txb_coeff_cost_map[plane][r][c] = INT_MAX;
344     }
345   }
346 #endif
347 }
348 
av1_merge_rd_stats(RD_STATS * rd_stats_dst,const RD_STATS * rd_stats_src)349 static INLINE void av1_merge_rd_stats(RD_STATS *rd_stats_dst,
350                                       const RD_STATS *rd_stats_src) {
351 #if CONFIG_RD_DEBUG
352   int plane;
353 #endif
354   rd_stats_dst->rate += rd_stats_src->rate;
355   if (!rd_stats_dst->zero_rate)
356     rd_stats_dst->zero_rate = rd_stats_src->zero_rate;
357   rd_stats_dst->dist += rd_stats_src->dist;
358   rd_stats_dst->sse += rd_stats_src->sse;
359   rd_stats_dst->skip &= rd_stats_src->skip;
360   rd_stats_dst->invalid_rate &= rd_stats_src->invalid_rate;
361 #if CONFIG_RD_DEBUG
362   // This may run into problems when monochrome video is
363   // encoded, as there will only be 1 plane
364   for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
365     rd_stats_dst->txb_coeff_cost[plane] += rd_stats_src->txb_coeff_cost[plane];
366     {
367       // TODO(angiebird): optimize this part
368       int r, c;
369       int ref_txb_coeff_cost = 0;
370       for (r = 0; r < TXB_COEFF_COST_MAP_SIZE; ++r)
371         for (c = 0; c < TXB_COEFF_COST_MAP_SIZE; ++c) {
372           rd_stats_dst->txb_coeff_cost_map[plane][r][c] +=
373               rd_stats_src->txb_coeff_cost_map[plane][r][c];
374           ref_txb_coeff_cost += rd_stats_dst->txb_coeff_cost_map[plane][r][c];
375         }
376       assert(ref_txb_coeff_cost == rd_stats_dst->txb_coeff_cost[plane]);
377     }
378   }
379 #endif
380 }
381 
382 struct TileInfo;
383 struct TileDataEnc;
384 struct AV1_COMP;
385 struct macroblock;
386 
387 int av1_compute_rd_mult(const struct AV1_COMP *cpi, int qindex);
388 
389 void av1_initialize_rd_consts(struct AV1_COMP *cpi);
390 
391 void av1_initialize_me_consts(const struct AV1_COMP *cpi, MACROBLOCK *x,
392                               int qindex);
393 
394 void av1_model_rd_from_var_lapndz(int64_t var, unsigned int n,
395                                   unsigned int qstep, int *rate, int64_t *dist);
396 
397 void av1_model_rd_curvfit(double xqr, double *rate_f, double *distbysse_f);
398 void av1_model_rd_surffit(double xm, double yl, double *rate_f,
399                           double *distbysse_f);
400 
401 int av1_get_switchable_rate(const AV1_COMMON *const cm, MACROBLOCK *x,
402                             const MACROBLOCKD *xd);
403 
404 int av1_raster_block_offset(BLOCK_SIZE plane_bsize, int raster_block,
405                             int stride);
406 
407 int16_t *av1_raster_block_offset_int16(BLOCK_SIZE plane_bsize, int raster_block,
408                                        int16_t *base);
409 
410 YV12_BUFFER_CONFIG *av1_get_scaled_ref_frame(const struct AV1_COMP *cpi,
411                                              int ref_frame);
412 
413 void av1_init_me_luts(void);
414 
415 void av1_set_mvcost(MACROBLOCK *x, int ref, int ref_mv_idx);
416 
417 void av1_get_entropy_contexts(BLOCK_SIZE bsize,
418                               const struct macroblockd_plane *pd,
419                               ENTROPY_CONTEXT t_above[MAX_MIB_SIZE],
420                               ENTROPY_CONTEXT t_left[MAX_MIB_SIZE]);
421 
422 void av1_set_rd_speed_thresholds(struct AV1_COMP *cpi);
423 
424 void av1_set_rd_speed_thresholds_sub8x8(struct AV1_COMP *cpi);
425 
426 void av1_update_rd_thresh_fact(const AV1_COMMON *const cm,
427                                int (*fact)[MAX_MODES], int rd_thresh, int bsize,
428                                int best_mode_index);
429 
rd_less_than_thresh(int64_t best_rd,int thresh,int thresh_fact)430 static INLINE int rd_less_than_thresh(int64_t best_rd, int thresh,
431                                       int thresh_fact) {
432   return best_rd < ((int64_t)thresh * thresh_fact >> 5) || thresh == INT_MAX;
433 }
434 
435 void av1_mv_pred(const struct AV1_COMP *cpi, MACROBLOCK *x,
436                  uint8_t *ref_y_buffer, int ref_y_stride, int ref_frame,
437                  BLOCK_SIZE block_size);
438 
set_error_per_bit(MACROBLOCK * x,int rdmult)439 static INLINE void set_error_per_bit(MACROBLOCK *x, int rdmult) {
440   x->errorperbit = rdmult >> RD_EPB_SHIFT;
441   x->errorperbit += (x->errorperbit == 0);
442 }
443 
444 void av1_setup_pred_block(const MACROBLOCKD *xd,
445                           struct buf_2d dst[MAX_MB_PLANE],
446                           const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
447                           const struct scale_factors *scale,
448                           const struct scale_factors *scale_uv,
449                           const int num_planes);
450 
451 int av1_get_intra_cost_penalty(int qindex, int qdelta,
452                                aom_bit_depth_t bit_depth);
453 
454 void av1_fill_mode_rates(AV1_COMMON *const cm, MACROBLOCK *x,
455                          FRAME_CONTEXT *fc);
456 
457 void av1_fill_coeff_costs(MACROBLOCK *x, FRAME_CONTEXT *fc,
458                           const int num_planes);
459 
460 #ifdef __cplusplus
461 }  // extern "C"
462 #endif
463 
464 #endif  // AOM_AV1_ENCODER_RD_H_
465