1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #ifndef AOM_AV1_ENCODER_BLOCK_H_
13 #define AOM_AV1_ENCODER_BLOCK_H_
14 
15 #include "av1/common/entropymv.h"
16 #include "av1/common/entropy.h"
17 #include "av1/common/mvref_common.h"
18 #include "av1/encoder/hash.h"
19 #if CONFIG_DIST_8X8
20 #include "aom/aomcx.h"
21 #endif
22 
23 #ifdef __cplusplus
24 extern "C" {
25 #endif
26 
27 typedef struct {
28   unsigned int sse;
29   int sum;
30   unsigned int var;
31 } DIFF;
32 
33 typedef struct macroblock_plane {
34   DECLARE_ALIGNED(16, int16_t, src_diff[MAX_SB_SQUARE]);
35   tran_low_t *qcoeff;
36   tran_low_t *coeff;
37   uint16_t *eobs;
38   uint8_t *txb_entropy_ctx;
39   struct buf_2d src;
40 
41   // Quantizer setings
42   // These are used/accessed only in the quantization process
43   // RDO does not / must not depend on any of these values
44   // All values below share the coefficient scale/shift used in TX
45   const int16_t *quant_fp_QTX;
46   const int16_t *round_fp_QTX;
47   const int16_t *quant_QTX;
48   const int16_t *quant_shift_QTX;
49   const int16_t *zbin_QTX;
50   const int16_t *round_QTX;
51   const int16_t *dequant_QTX;
52 } MACROBLOCK_PLANE;
53 
54 typedef struct {
55   int txb_skip_cost[TXB_SKIP_CONTEXTS][2];
56   int base_eob_cost[SIG_COEF_CONTEXTS_EOB][3];
57   int base_cost[SIG_COEF_CONTEXTS][4];
58   int eob_extra_cost[EOB_COEF_CONTEXTS][2];
59   int dc_sign_cost[DC_SIGN_CONTEXTS][2];
60   int lps_cost[LEVEL_CONTEXTS][COEFF_BASE_RANGE + 1];
61 } LV_MAP_COEFF_COST;
62 
63 typedef struct {
64   int eob_cost[2][11];
65 } LV_MAP_EOB_COST;
66 
67 typedef struct {
68   tran_low_t tcoeff[MAX_MB_PLANE][MAX_SB_SQUARE];
69   uint16_t eobs[MAX_MB_PLANE][MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
70   uint8_t txb_skip_ctx[MAX_MB_PLANE]
71                       [MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
72   int dc_sign_ctx[MAX_MB_PLANE]
73                  [MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)];
74 } CB_COEFF_BUFFER;
75 
76 typedef struct {
77   int16_t mode_context[MODE_CTX_REF_FRAMES];
78   // TODO(angiebird): Reduce the buffer size according to sb_type
79   tran_low_t *tcoeff[MAX_MB_PLANE];
80   uint16_t *eobs[MAX_MB_PLANE];
81   uint8_t *txb_skip_ctx[MAX_MB_PLANE];
82   int *dc_sign_ctx[MAX_MB_PLANE];
83   uint8_t ref_mv_count[MODE_CTX_REF_FRAMES];
84   CANDIDATE_MV ref_mv_stack[MODE_CTX_REF_FRAMES][MAX_REF_MV_STACK_SIZE];
85   int_mv global_mvs[REF_FRAMES];
86   int16_t compound_mode_context[MODE_CTX_REF_FRAMES];
87 } MB_MODE_INFO_EXT;
88 
89 typedef struct {
90   int col_min;
91   int col_max;
92   int row_min;
93   int row_max;
94 } MvLimits;
95 
96 typedef struct {
97   uint8_t best_palette_color_map[MAX_PALETTE_SQUARE];
98   int kmeans_data_buf[2 * MAX_PALETTE_SQUARE];
99 } PALETTE_BUFFER;
100 
101 typedef struct {
102   TX_SIZE tx_size;
103   TX_SIZE inter_tx_size[INTER_TX_SIZE_BUF_LEN];
104   uint8_t blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
105   TX_TYPE txk_type[TXK_TYPE_BUF_LEN];
106   RD_STATS rd_stats;
107   uint32_t hash_value;
108 } MB_RD_INFO;
109 
110 #define RD_RECORD_BUFFER_LEN 8
111 typedef struct {
112   MB_RD_INFO tx_rd_info[RD_RECORD_BUFFER_LEN];  // Circular buffer.
113   int index_start;
114   int num;
115   CRC32C crc_calculator;  // Hash function.
116 } MB_RD_RECORD;
117 
118 typedef struct {
119   int64_t dist;
120   int64_t sse;
121   int rate;
122   uint16_t eob;
123   TX_TYPE tx_type;
124   uint16_t entropy_context;
125   uint8_t txb_entropy_ctx;
126   uint8_t valid;
127   uint8_t fast;  // This is not being used now.
128 } TXB_RD_INFO;
129 
130 #define TX_SIZE_RD_RECORD_BUFFER_LEN 256
131 typedef struct {
132   uint32_t hash_vals[TX_SIZE_RD_RECORD_BUFFER_LEN];
133   TXB_RD_INFO tx_rd_info[TX_SIZE_RD_RECORD_BUFFER_LEN];
134   int index_start;
135   int num;
136 } TXB_RD_RECORD;
137 
138 typedef struct tx_size_rd_info_node {
139   TXB_RD_INFO *rd_info_array;  // Points to array of size TX_TYPES.
140   struct tx_size_rd_info_node *children[4];
141 } TXB_RD_INFO_NODE;
142 
143 // Region size for mode decision sampling in the first pass of partition
144 // search(two_pass_partition_search speed feature), in units of mi size(4).
145 // Used by the mode_pruning_based_on_two_pass_partition_search speed feature.
146 #define FIRST_PARTITION_PASS_SAMPLE_REGION 8
147 #define FIRST_PARTITION_PASS_SAMPLE_REGION_LOG2 3
148 #define FIRST_PARTITION_PASS_STATS_TABLES                     \
149   (MAX_MIB_SIZE >> FIRST_PARTITION_PASS_SAMPLE_REGION_LOG2) * \
150       (MAX_MIB_SIZE >> FIRST_PARTITION_PASS_SAMPLE_REGION_LOG2)
151 #define FIRST_PARTITION_PASS_STATS_STRIDE \
152   (MAX_MIB_SIZE_LOG2 - FIRST_PARTITION_PASS_SAMPLE_REGION_LOG2)
153 
av1_first_partition_pass_stats_index(int mi_row,int mi_col)154 static INLINE int av1_first_partition_pass_stats_index(int mi_row, int mi_col) {
155   const int row =
156       (mi_row & MAX_MIB_MASK) >> FIRST_PARTITION_PASS_SAMPLE_REGION_LOG2;
157   const int col =
158       (mi_col & MAX_MIB_MASK) >> FIRST_PARTITION_PASS_SAMPLE_REGION_LOG2;
159   return (row << FIRST_PARTITION_PASS_STATS_STRIDE) + col;
160 }
161 
162 typedef struct {
163   uint8_t ref0_counts[REF_FRAMES];  // Counters for ref_frame[0].
164   uint8_t ref1_counts[REF_FRAMES];  // Counters for ref_frame[1].
165   int sample_counts;                // Number of samples collected.
166 } FIRST_PARTITION_PASS_STATS;
167 
168 #define MAX_INTERP_FILTER_STATS 64
169 typedef struct {
170   InterpFilters filters;
171   int_mv mv[2];
172   int8_t ref_frames[2];
173   COMPOUND_TYPE comp_type;
174 } INTERPOLATION_FILTER_STATS;
175 
176 typedef struct macroblock MACROBLOCK;
177 struct macroblock {
178   struct macroblock_plane plane[MAX_MB_PLANE];
179 
180   // Determine if one would go with reduced complexity transform block
181   // search model to select prediction modes, or full complexity model
182   // to select transform kernel.
183   int rd_model;
184 
185   // Indicate if the encoder is running in the first pass partition search.
186   // In that case, apply certain speed features therein to reduce the overhead
187   // cost in the first pass search.
188   int cb_partition_scan;
189 
190   FIRST_PARTITION_PASS_STATS
191   first_partition_pass_stats[FIRST_PARTITION_PASS_STATS_TABLES];
192 
193   // [comp_idx][saved stat_idx]
194   INTERPOLATION_FILTER_STATS interp_filter_stats[2][MAX_INTERP_FILTER_STATS];
195   int interp_filter_stats_idx[2];
196 
197   // Activate constrained coding block partition search range.
198   int use_cb_search_range;
199 
200   // Inter macroblock RD search info.
201   MB_RD_RECORD mb_rd_record;
202 
203   // Inter transform block RD search info. for square TX sizes.
204   TXB_RD_RECORD txb_rd_record_8X8[(MAX_MIB_SIZE >> 1) * (MAX_MIB_SIZE >> 1)];
205   TXB_RD_RECORD txb_rd_record_16X16[(MAX_MIB_SIZE >> 2) * (MAX_MIB_SIZE >> 2)];
206   TXB_RD_RECORD txb_rd_record_32X32[(MAX_MIB_SIZE >> 3) * (MAX_MIB_SIZE >> 3)];
207   TXB_RD_RECORD txb_rd_record_64X64[(MAX_MIB_SIZE >> 4) * (MAX_MIB_SIZE >> 4)];
208 
209   // Intra transform block RD search info. for square TX sizes.
210   TXB_RD_RECORD txb_rd_record_intra;
211 
212   MACROBLOCKD e_mbd;
213   MB_MODE_INFO_EXT *mbmi_ext;
214   int skip_block;
215   int qindex;
216 
217   // The equivalent error at the current rdmult of one whole bit (not one
218   // bitcost unit).
219   int errorperbit;
220   // The equivalend SAD error of one (whole) bit at the current quantizer
221   // for large blocks.
222   int sadperbit16;
223   // The equivalend SAD error of one (whole) bit at the current quantizer
224   // for sub-8x8 blocks.
225   int sadperbit4;
226   int rdmult;
227   int mb_energy;
228   int sb_energy_level;
229   int *m_search_count_ptr;
230   int *ex_search_count_ptr;
231 
232   unsigned int txb_split_count;
233 
234   // These are set to their default values at the beginning, and then adjusted
235   // further in the encoding process.
236   BLOCK_SIZE min_partition_size;
237   BLOCK_SIZE max_partition_size;
238 
239   unsigned int max_mv_context[REF_FRAMES];
240   unsigned int source_variance;
241   unsigned int pred_sse[REF_FRAMES];
242   int pred_mv_sad[REF_FRAMES];
243 
244   int *nmvjointcost;
245   int nmv_vec_cost[MV_JOINTS];
246   int *nmvcost[2];
247   int *nmvcost_hp[2];
248   int **mv_cost_stack;
249   int **mvcost;
250 
251   int32_t *wsrc_buf;
252   int32_t *mask_buf;
253   uint8_t *above_pred_buf;
254   uint8_t *left_pred_buf;
255 
256   PALETTE_BUFFER *palette_buffer;
257 
258   CONV_BUF_TYPE *tmp_conv_dst;
259   uint8_t *tmp_obmc_bufs[2];
260 
261   // buffer for hash value calculation of a block
262   // used only in av1_get_block_hash_value()
263   // [first hash/second hash]
264   // [two buffers used ping-pong]
265   uint32_t *hash_value_buffer[2][2];
266 
267   CRC_CALCULATOR crc_calculator1;
268   CRC_CALCULATOR crc_calculator2;
269   int g_crc_initialized;
270 
271   // These define limits to motion vector components to prevent them
272   // from extending outside the UMV borders
273   MvLimits mv_limits;
274 
275   uint8_t blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
276 
277   int skip;
278   int skip_chroma_rd;
279   int skip_cost[SKIP_CONTEXTS][2];
280 
281   int skip_mode;  // 0: off; 1: on
282   int skip_mode_cost[SKIP_CONTEXTS][2];
283 
284   int compound_idx;
285 
286   LV_MAP_COEFF_COST coeff_costs[TX_SIZES][PLANE_TYPES];
287   LV_MAP_EOB_COST eob_costs[7][2];
288   uint16_t cb_offset;
289 
290   // mode costs
291   int intra_inter_cost[INTRA_INTER_CONTEXTS][2];
292 
293   int mbmode_cost[BLOCK_SIZE_GROUPS][INTRA_MODES];
294   int newmv_mode_cost[NEWMV_MODE_CONTEXTS][2];
295   int zeromv_mode_cost[GLOBALMV_MODE_CONTEXTS][2];
296   int refmv_mode_cost[REFMV_MODE_CONTEXTS][2];
297   int drl_mode_cost0[DRL_MODE_CONTEXTS][2];
298 
299   int comp_inter_cost[COMP_INTER_CONTEXTS][2];
300   int single_ref_cost[REF_CONTEXTS][SINGLE_REFS - 1][2];
301   int comp_ref_type_cost[COMP_REF_TYPE_CONTEXTS]
302                         [CDF_SIZE(COMP_REFERENCE_TYPES)];
303   int uni_comp_ref_cost[UNI_COMP_REF_CONTEXTS][UNIDIR_COMP_REFS - 1]
304                        [CDF_SIZE(2)];
305   // Cost for signaling ref_frame[0] (LAST_FRAME, LAST2_FRAME, LAST3_FRAME or
306   // GOLDEN_FRAME) in bidir-comp mode.
307   int comp_ref_cost[REF_CONTEXTS][FWD_REFS - 1][2];
308   // Cost for signaling ref_frame[1] (ALTREF_FRAME, ALTREF2_FRAME, or
309   // BWDREF_FRAME) in bidir-comp mode.
310   int comp_bwdref_cost[REF_CONTEXTS][BWD_REFS - 1][2];
311   int inter_compound_mode_cost[INTER_MODE_CONTEXTS][INTER_COMPOUND_MODES];
312   int compound_type_cost[BLOCK_SIZES_ALL][COMPOUND_TYPES - 1];
313   int wedge_idx_cost[BLOCK_SIZES_ALL][16];
314   int interintra_cost[BLOCK_SIZE_GROUPS][2];
315   int wedge_interintra_cost[BLOCK_SIZES_ALL][2];
316   int interintra_mode_cost[BLOCK_SIZE_GROUPS][INTERINTRA_MODES];
317   int motion_mode_cost[BLOCK_SIZES_ALL][MOTION_MODES];
318   int motion_mode_cost1[BLOCK_SIZES_ALL][2];
319   int intra_uv_mode_cost[CFL_ALLOWED_TYPES][INTRA_MODES][UV_INTRA_MODES];
320   int y_mode_costs[INTRA_MODES][INTRA_MODES][INTRA_MODES];
321   int filter_intra_cost[BLOCK_SIZES_ALL][2];
322   int filter_intra_mode_cost[FILTER_INTRA_MODES];
323   int switchable_interp_costs[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS];
324   int partition_cost[PARTITION_CONTEXTS][EXT_PARTITION_TYPES];
325   int palette_y_size_cost[PALATTE_BSIZE_CTXS][PALETTE_SIZES];
326   int palette_uv_size_cost[PALATTE_BSIZE_CTXS][PALETTE_SIZES];
327   int palette_y_color_cost[PALETTE_SIZES][PALETTE_COLOR_INDEX_CONTEXTS]
328                           [PALETTE_COLORS];
329   int palette_uv_color_cost[PALETTE_SIZES][PALETTE_COLOR_INDEX_CONTEXTS]
330                            [PALETTE_COLORS];
331   int palette_y_mode_cost[PALATTE_BSIZE_CTXS][PALETTE_Y_MODE_CONTEXTS][2];
332   int palette_uv_mode_cost[PALETTE_UV_MODE_CONTEXTS][2];
333   // The rate associated with each alpha codeword
334   int cfl_cost[CFL_JOINT_SIGNS][CFL_PRED_PLANES][CFL_ALPHABET_SIZE];
335   int tx_size_cost[TX_SIZES - 1][TX_SIZE_CONTEXTS][TX_SIZES];
336   int txfm_partition_cost[TXFM_PARTITION_CONTEXTS][2];
337   int inter_tx_type_costs[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES];
338   int intra_tx_type_costs[EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES]
339                          [TX_TYPES];
340   int angle_delta_cost[DIRECTIONAL_MODES][2 * MAX_ANGLE_DELTA + 1];
341   int switchable_restore_cost[RESTORE_SWITCHABLE_TYPES];
342   int wiener_restore_cost[2];
343   int sgrproj_restore_cost[2];
344   int intrabc_cost[2];
345 
346   // Used to store sub partition's choices.
347   MV pred_mv[REF_FRAMES];
348 
349   // Store the best motion vector during motion search
350   int_mv best_mv;
351   // Store the second best motion vector during full-pixel motion search
352   int_mv second_best_mv;
353 
354   // use default transform and skip transform type search for intra modes
355   int use_default_intra_tx_type;
356   // use default transform and skip transform type search for inter modes
357   int use_default_inter_tx_type;
358 #if CONFIG_DIST_8X8
359   int using_dist_8x8;
360   aom_tune_metric tune_metric;
361 #endif  // CONFIG_DIST_8X8
362   int comp_idx_cost[COMP_INDEX_CONTEXTS][2];
363   int comp_group_idx_cost[COMP_GROUP_IDX_CONTEXTS][2];
364   // Bit flags for pruning tx type search, tx split, etc.
365   int tx_search_prune[EXT_TX_SET_TYPES];
366   int must_find_valid_partition;
367   int tx_split_prune_flag;  // Flag to skip tx split RD search.
368   int recalc_luma_mc_data;  // Flag to indicate recalculation of MC data during
369                             // interpolation filter search
370 };
371 
is_rect_tx_allowed_bsize(BLOCK_SIZE bsize)372 static INLINE int is_rect_tx_allowed_bsize(BLOCK_SIZE bsize) {
373   static const char LUT[BLOCK_SIZES_ALL] = {
374     0,  // BLOCK_4X4
375     1,  // BLOCK_4X8
376     1,  // BLOCK_8X4
377     0,  // BLOCK_8X8
378     1,  // BLOCK_8X16
379     1,  // BLOCK_16X8
380     0,  // BLOCK_16X16
381     1,  // BLOCK_16X32
382     1,  // BLOCK_32X16
383     0,  // BLOCK_32X32
384     1,  // BLOCK_32X64
385     1,  // BLOCK_64X32
386     0,  // BLOCK_64X64
387     0,  // BLOCK_64X128
388     0,  // BLOCK_128X64
389     0,  // BLOCK_128X128
390     1,  // BLOCK_4X16
391     1,  // BLOCK_16X4
392     1,  // BLOCK_8X32
393     1,  // BLOCK_32X8
394     1,  // BLOCK_16X64
395     1,  // BLOCK_64X16
396   };
397 
398   return LUT[bsize];
399 }
400 
is_rect_tx_allowed(const MACROBLOCKD * xd,const MB_MODE_INFO * mbmi)401 static INLINE int is_rect_tx_allowed(const MACROBLOCKD *xd,
402                                      const MB_MODE_INFO *mbmi) {
403   return is_rect_tx_allowed_bsize(mbmi->sb_type) &&
404          !xd->lossless[mbmi->segment_id];
405 }
406 
tx_size_to_depth(TX_SIZE tx_size,BLOCK_SIZE bsize)407 static INLINE int tx_size_to_depth(TX_SIZE tx_size, BLOCK_SIZE bsize) {
408   TX_SIZE ctx_size = max_txsize_rect_lookup[bsize];
409   int depth = 0;
410   while (tx_size != ctx_size) {
411     depth++;
412     ctx_size = sub_tx_size_map[ctx_size];
413     assert(depth <= MAX_TX_DEPTH);
414   }
415   return depth;
416 }
417 
set_blk_skip(MACROBLOCK * x,int plane,int blk_idx,int skip)418 static INLINE void set_blk_skip(MACROBLOCK *x, int plane, int blk_idx,
419                                 int skip) {
420   if (skip)
421     x->blk_skip[blk_idx] |= 1UL << plane;
422   else
423     x->blk_skip[blk_idx] &= ~(1UL << plane);
424 #ifndef NDEBUG
425   // Set chroma planes to uninitialized states when luma is set to check if
426   // it will be set later
427   if (plane == 0) {
428     x->blk_skip[blk_idx] |= 1UL << (1 + 4);
429     x->blk_skip[blk_idx] |= 1UL << (2 + 4);
430   }
431 
432   // Clear the initialization checking bit
433   x->blk_skip[blk_idx] &= ~(1UL << (plane + 4));
434 #endif
435 }
436 
is_blk_skip(MACROBLOCK * x,int plane,int blk_idx)437 static INLINE int is_blk_skip(MACROBLOCK *x, int plane, int blk_idx) {
438 #ifndef NDEBUG
439   // Check if this is initialized
440   assert(!(x->blk_skip[blk_idx] & (1UL << (plane + 4))));
441 
442   // The magic number is 0x77, this is to test if there is garbage data
443   assert((x->blk_skip[blk_idx] & 0x88) == 0);
444 #endif
445   return (x->blk_skip[blk_idx] >> plane) & 1;
446 }
447 
448 #ifdef __cplusplus
449 }  // extern "C"
450 #endif
451 
452 #endif  // AOM_AV1_ENCODER_BLOCK_H_
453