1 /*
2 * Copyright(c) 2019 Intel Corporation
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at https://www.aomedia.org/license/software-license. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at https://www.aomedia.org/license/patent-license.
10 */
11 
12 #ifndef EbModeDecisionProcess_h
13 #define EbModeDecisionProcess_h
14 
15 #include "EbDefinitions.h"
16 #include "EbModeDecision.h"
17 #include "EbSyntaxElements.h"
18 #include "EbSystemResourceManager.h"
19 #include "EbPictureBufferDesc.h"
20 #include "EbEntropyCoding.h"
21 #include "EbTransQuantBuffers.h"
22 #include "EbReferenceObject.h"
23 #include "EbNeighborArrays.h"
24 #include "EbObject.h"
25 #include "EbEncInterPrediction.h"
26 
27 #ifdef __cplusplus
28 extern "C" {
29 #endif
30 /**************************************
31      * Defines
32      **************************************/
33 #define MODE_DECISION_CANDIDATE_MAX_COUNT_Y 1855
34 #define MODE_DECISION_CANDIDATE_MAX_COUNT (MODE_DECISION_CANDIDATE_MAX_COUNT_Y + 84)
35 #define DEPTH_ONE_STEP 21
36 #define DEPTH_TWO_STEP 5
37 #define DEPTH_THREE_STEP 1
38 #define MAX_MVP_CANIDATES 4
39 /**************************************
40       * Macros
41       **************************************/
42 
43 #define GROUP_OF_4_8x8_BLOCKS(origin_x, origin_y) \
44     (((origin_x >> 3) & 0x1) && ((origin_y >> 3) & 0x1) ? EB_TRUE : EB_FALSE)
45 #define GROUP_OF_4_16x16_BLOCKS(origin_x, origin_y) \
46     (((((origin_x >> 3) & 0x2) == 0x2) && (((origin_y >> 3) & 0x2) == 0x2)) ? EB_TRUE : EB_FALSE)
47 #define GROUP_OF_4_32x32_BLOCKS(origin_x, origin_y) \
48     (((((origin_x >> 3) & 0x4) == 0x4) && (((origin_y >> 3) & 0x4) == 0x4)) ? EB_TRUE : EB_FALSE)
49 
50 /**************************************
51        * Coding Loop Context
52        **************************************/
53 typedef struct MdEncPassCuData {
54     uint64_t skip_cost;
55     uint64_t merge_cost;
56     uint64_t chroma_distortion;
57 } MdEncPassCuData;
58 
59 typedef struct {
60     uint8_t best_palette_color_map[MAX_PALETTE_SQUARE];
61     int     kmeans_data_buf[2 * MAX_PALETTE_SQUARE];
62 } PALETTE_BUFFER;
63 typedef struct MdBlkStruct {
64     unsigned             mdc_array_index : 7;
65     unsigned             count_non_zero_coeffs : 12;
66     unsigned             top_neighbor_depth : 8;
67     unsigned             left_neighbor_depth : 8;
68     unsigned             full_distortion : 32;
69     uint64_t             rec_dist_per_quadrant[4];
70     PartitionContextType left_neighbor_partition;
71     PartitionContextType above_neighbor_partition;
72     uint64_t             cost;
73     uint64_t
74                 default_cost; // Similar to cost but does not get updated @ d1_non_square_block_decision() and d2_inter_depth_block_decision()
75     CandidateMv ed_ref_mv_stack[MODE_CTX_REF_FRAMES]
76                                [MAX_REF_MV_STACK_SIZE]; //to be used in MD and EncDec
77     IntMv    ref_mvs[MODE_CTX_REF_FRAMES][MAX_MV_REF_CANDIDATES]; //used only for nonCompound modes.
78     uint32_t best_d1_blk;
79     uint8_t *neigh_left_recon[3]; //only for MD
80     uint8_t *neigh_top_recon[3];
81     uint16_t *neigh_left_recon_16bit[3];
82     uint16_t *neigh_top_recon_16bit[3];
83     int32_t quantized_dc[3][MAX_TXB_COUNT];
84 
85     uint8_t   skip_mode_allowed;
86     // wm
87     EbWarpedMotionParams wm_params_l0;
88     EbWarpedMotionParams wm_params_l1;
89     // compound
90     uint8_t                compound_idx;
91     InterInterCompoundData interinter_comp;
92     // txb
93     uint8_t u_has_coeff[TRANSFORM_UNIT_MAX_COUNT];
94     uint8_t v_has_coeff[TRANSFORM_UNIT_MAX_COUNT];
95     uint8_t y_has_coeff[TRANSFORM_UNIT_MAX_COUNT];
96 } MdBlkStruct;
97 
98 struct ModeDecisionCandidate;
99 struct ModeDecisionCandidateBuffer;
100 struct InterPredictionContext;
101 
102 typedef struct RefResults {
103     uint8_t  list_i; // list index of this ref
104     uint8_t  ref_i; // ref list index of this ref
105     uint32_t dist; // distortion
106     uint8_t  do_ref; // to process this ref  or not
107     EbBool   valid_ref;
108 } RefResults;
109 typedef enum InterCandGroup {
110     // elementary-groups
111     PA_ME_GROUP,
112     UNI_3x3_GROUP,
113     BI_3x3_GROUP,
114     NRST_NEW_NEAR_GROUP,
115     NRST_NEAR_GROUP,
116     PRED_ME_GROUP,
117     GLOBAL_GROUP,
118     // complex-groups
119     WARP_GROUP,
120     OBMC_GROUP,
121     INTER_INTRA_GROUP,
122     COMP_DIST,
123     COMP_DIFF,
124     COMP_WEDGE,
125     TOT_INTER_GROUP
126 } InterCandGroup;
127 typedef struct InterCompCtrls {
128     uint8_t allowed_comp_types
129         [MD_COMP_TYPES]; // Compound types to inject; AVG/DIST/DIFF/WEDGE (if a comp type is disallowed here, it will
130         // override distance-based settings)
131     uint8_t do_me;                                   // if true, test all compound types for me
132     uint8_t do_pme;                                  // if true, test all compound types for pme
133     uint8_t do_nearest_nearest;                      // if true, test all compound types for nearest_nearest
134     uint8_t do_near_near;                            // if true, test all compound types for near_near
135     uint8_t do_nearest_near_new;                     // if true, test all compound types for nearest_near_new
136     uint8_t do_3x3_bi;                               // if true, test all compound types for 3x3_bipred
137 
138     uint8_t pred0_to_pred1_mult;                     // multiplier to the pred0_to_pred1_sad; 0: no pred0_to_pred1_sad-based pruning, >= 1: towards more inter-inter compound
139     uint8_t use_rate;                                // if true, use rate @ compound params derivation
140 } InterCompCtrls;
141 typedef struct InterIntraCompCtrls {
142     uint8_t enabled;
143 } InterIntraCompCtrls;
144 typedef struct ObmcControls {
145     uint8_t enabled;
146     EbBool  max_blk_size_16x16; // if true, cap the max block size that OBMC can be used to 16x16
147 } ObmcControls;
148 typedef struct TxtControls {
149     uint8_t enabled;
150 
151     uint8_t txt_group_inter_lt_16x16; // group to use when inter and tx block < 16x16
152     uint8_t txt_group_inter_gt_eq_16x16; // group to use when inter and tx block >= 16x16
153 
154     uint8_t txt_group_intra_lt_16x16; // group to use when intra and tx block < 16x16
155     uint8_t txt_group_intra_gt_eq_16x16; // group to use when intra and tx block >= 16x16
156 } TxtControls;
157 typedef struct TxsCycleRControls {
158     uint8_t  enabled; // On/Off feature control
159     uint16_t intra_th; // Threshold to bypass intra TXS <the higher th the higher speed>
160     uint16_t inter_th; // Threshold to bypass inter TXS <the higher th the higher speed>
161 } TxsCycleRControls;
162 
163 typedef struct NearCountCtrls {
164     uint8_t enabled;
165 
166     uint8_t near_count;       // max # of near to consider
167     uint8_t near_near_count;    // max # of near_near to consider
168 }NearCountCtrls;
169 
170 typedef struct RefPruningControls {
171     uint8_t enabled; // 0: OFF; 1: use inter to inter distortion deviation to derive best_refs
172     uint32_t max_dev_to_best[TOT_INTER_GROUP];     // 0: OFF; 1: limit the injection to the best references based on distortion
173     uint32_t ref_idx_2_offset;
174     uint32_t ref_idx_3_offset;
175     uint8_t closest_refs
176         [TOT_INTER_GROUP]; // 0: OFF; 1: limit the injection to the closest references based on distance (LAST/BWD)
177 } RefPruningControls;
178 typedef struct DepthRefinementCtrls {
179     uint8_t enabled;
180 
181     int64_t sub_to_current_th; // decrease towards a more agressive level
182     int64_t parent_to_current_th; // decrease towards a more agressive level
183     uint8_t up_to_2_depth;                        // when 1, a maximum of 2 depth per block (PRED+Parent or PRED+Sub), 0: no restriction(s)
184     uint8_t
185         use_pred_block_cost; // add an offset to sub_to_current_th and parent_to_current_th on the cost range of the predicted block; use default ths for high cost(s) and more aggressive TH(s) for low cost(s)
186 } DepthRefinementCtrls;
187 typedef struct DepthRemovalCtrls {
188     uint8_t enabled;
189     uint8_t disallow_below_64x64;  // remove 32x32 blocks and below based on the sb_64x64 (me_distortion, variance)
190     uint8_t disallow_below_32x32;  // remove 16x16 blocks and below based on the sb_64x64 (me_distortion, variance)
191     uint8_t disallow_below_16x16;  // remove 8x8 blocks and below based on the sb_64x64 (me_distortion, variance)
192 }DepthRemovalCtrls;
193 typedef struct DepthCtrls {
194     int8_t s_depth; // start depth; 0: consider no parent blocks; else number of parent blocks to consider, specified as a negative number (e.g. -2 means consider 2 parents)
195     int8_t e_depth; // end depth; 0: consider no child blocks; else number of child blocks to consider, specified as a positive number (e.g. 2 means consider 2 children)
196 }DepthCtrls;
197 #define MAX_RANGE_CNT 8
198 typedef struct InDepthBlockSkipCtrls {
199     uint16_t base_weight;                      // 0: in-depth-block-skip OFF; 1: in-depth-block-skip ON
200                                                // higher towards more aggressive level(s)
201                                                // 0: the estimated cost for the next children is not taken into account and the action will be lossless compared to no in - depth - block - skip
202                                                // 100 : the normalized cost of next children is assumed to be equal to the normalized cost of past children
203 
204     uint8_t  cost_band_based_modulation;       // whether to amplify the base_weight based on the cost range of the parent block or not
205     uint16_t max_cost_multiplier;              // the max cost beyond which the base_weight is zeroed out
206     uint8_t  max_band_cnt;                     // the number of band(s)
207     uint16_t weight_per_band[MAX_RANGE_CNT];   // the weight per band
208 
209     uint8_t  child_cnt_based_modulation;       // whether to modulate based on the child count
210     uint16_t cnt_based_weight[3];              // to specify the weight per child cnt
211 
212 } InDepthBlockSkipCtrls;
213 typedef struct LowerDepthBlockSkipCtrls {
214     uint8_t enabled;
215     float min_distortion_cost_ratio; // the distortion-to-cost ratio under wich the quad_deviation_th is zeroed out (feature is disabled)
216     float quad_deviation_th;         // do not perform sub_depth if std_deviation of the 4 quadrants src-to-rec dist is less than std_deviation_th
217     uint8_t skip_all;                // whether to skip all or only next depth; 0: skip only next depth; 1: skip all lower depths
218 }LowerDepthBlockSkipCtrls;
219 typedef struct PfCtrls {
220     EB_TRANS_COEFF_SHAPE pf_shape;
221 } PfCtrls;
222 typedef struct MdNsqMotionSearchCtrls {
223     uint8_t enabled; // 0: NSQ motion search @ MD OFF; 1: NSQ motion search @ MD ON
224     uint8_t use_ssd; // 0: search using SAD; 1: search using SSD
225     uint8_t full_pel_search_width; // Full Pel search area width
226     uint8_t full_pel_search_height; // Full Pel search area height
227 } MdNsqMotionSearchCtrls;
228 typedef struct MdSqMotionSearchCtrls {
229     uint8_t enabled; // 0: SQ motion search @ MD OFF; 1: SQ motion search @ MD ON
230     uint8_t use_ssd; // 0: search using SAD; 1: search using SSD
231 
232     uint16_t
233         pame_distortion_th; // TH for pa_me distortion to determine whether to search (distortion per pixel)
234 
235     uint8_t  sprs_lev0_enabled; // 0: OFF; 1: ON
236     uint8_t  sprs_lev0_step; // Sparse search step
237     uint16_t sprs_lev0_w; // Sparse search area width
238     uint16_t sprs_lev0_h; // Sparse search area height
239     uint16_t max_sprs_lev0_w; // Max Sparse search area width
240     uint16_t max_sprs_lev0_h; // Max Sparse search area height
241     int16_t  sprs_lev0_multiplier; // search area multiplier (is a % -- 100 is no scaling)
242 
243     uint8_t  sprs_lev1_enabled; // 0: OFF; 1: ON
244     uint8_t  sprs_lev1_step; // Sparse search step
245     uint16_t sprs_lev1_w; // Sparse search area width
246     uint16_t sprs_lev1_h; // Sparse search area height
247     uint16_t max_sprs_lev1_w; // Max Sparse search area width
248     uint16_t max_sprs_lev1_h; // Max Sparse search area height
249     int16_t  sprs_lev1_multiplier; // search area multiplier (is a % -- 100 is no scaling)
250 
251     uint8_t  sprs_lev2_enabled; // 0: OFF; 1: ON
252     uint8_t  sprs_lev2_step; // Sparse search step
253     uint16_t sprs_lev2_w; // Sparse search area width
254     uint16_t sprs_lev2_h; // Sparse search area height
255 } MdSqMotionSearchCtrls;
256 typedef struct MdPmeCtrls {
257     uint8_t enabled; // 0: PME search @ MD OFF; 1: PME search @ MD ON
258     uint8_t use_ssd; // 0: search using SAD; 1: search using SSD
259     uint8_t full_pel_search_width; // Full Pel search area width
260     uint8_t full_pel_search_height; // Full Pel search area height
261     int     pre_fp_pme_to_me_cost_th; // If pre_fp_pme_to_me_cost higher than pre_fp_pme_to_me_cost_th then PME_MV = ME_MV and exit (decrease towards a faster level)
262     int     pre_fp_pme_to_me_mv_th; // If pre_fp_pme_to_me_mv smaller than pre_fp_pme_to_me_mv_th then PME_MV = ME_MV and exit (increase towards a faster level)
263     int     post_fp_pme_to_me_cost_th; // If post_fp_pme_to_me_cost higher than post_fp_pme_to_me_cost_th then PME_MV = ME_MV and exit (decrease towards a faster level)
264     int     post_fp_pme_to_me_mv_th; // If post_fp_pme_to_me_mv smaller than post_fp_pme_to_me_mv_th then PME_MV = ME_MV and exit (increase towards a faster level)
265 } MdPmeCtrls;
266 typedef struct MdSubPelSearchCtrls {
267     uint8_t enabled; // 0: subpel search @ MD OFF; 1: subpel search @ MD ON
268     SUBPEL_SEARCH_TYPE
269         subpel_search_type; // USE_8_TAPS | USE_4_TAPS | USE_2_TAPS | USE_2_TAPS_ORIG (not supported)
270     int subpel_iters_per_step; // Maximum number of steps in logarithmic subpel search before giving up.
271     uint8_t eight_pel_search_enabled; // 0: OFF; 1: ON
272     SUBPEL_SEARCH_METHODS subpel_search_method;   // Subpel_search_method can only be subpel_tree which does a subpixel
273                                                   // logarithmic search that keeps stepping at 1/2 pixel units until
274                                                   // you stop getting a gain, and then goes on to 1/4 and repeats
275                                                   // the same process. Along the way it skips many diagonals.
276 } MdSubPelSearchCtrls;
277 typedef struct ParentSqCoeffAreaBasedCyclesReductionCtrls {
278     EbBool enabled;
279 
280     uint8_t high_freq_band1_th;         // cutoff for the highest coeff-area band [0-100]
281     uint8_t high_freq_band1_level;      // level of action to use if luma coeff-area of parent SQ is >= high_freq_band1_th
282     uint8_t high_freq_band2_th;         // cutoff for the second high coeff-area band [0-100]; should be less than high_freq_band1_th
283     uint8_t high_freq_band2_level;      // level of action to use if luma coeff-area of parent SQ is >= high_freq_band2_th
284     uint8_t high_freq_band3_th;         // cutoff for the third high coeff-area band [0-100]; should be less than high_freq_band2_th
285     uint8_t high_freq_band3_level;      // level of action to use if luma coeff-area of parent SQ is >= high_freq_band3_th
286 
287     uint8_t enable_zero_coeff_action;   // enable for whether to apply action when parent SQ has 0 luma coefficients
288     uint8_t zero_coeff_action;          // level of action to use if parent SQ has 0 luma coeffs
289     uint8_t enable_one_coeff_action;    // enable for whether to apply action when parent SQ has 1 luma coefficients
290     uint8_t one_coeff_action;           // level of action to use if parent SQ has 1 luma coeff
291 
292     uint8_t low_freq_band1_th;          // cutoff for the lowest coeff-area band [0-100]; should be less than high_freq_band2_th
293     uint8_t low_freq_band1_level;       // level of action to use if luma coeff-area of parent SQ is < low_freq_band1_th
294     uint8_t low_freq_band2_th;          // cutoff for the lowest coeff-area band [0-100]; should be less than high_freq_band2_th and larger than low_freq_band1_th
295     uint8_t low_freq_band2_level;       // level of action to use if luma coeff-area of parent SQ is < low_freq_band2_th
296 }ParentSqCoeffAreaBasedCyclesReductionCtrls;
297 typedef struct RdoqCtrls {
298     uint8_t enabled;
299 
300     uint8_t
301         eob_fast_l_inter; // 0: do not use eob_fast  for luma inter; 1:  use eob_fast  for luma inter
302     uint8_t
303         eob_fast_l_intra; // 0: do not use eob_fast  for luma intra; 1:  use eob_fast  for luma intra
304     uint8_t
305         eob_fast_c_inter; // 0: do not use eob_fast  for chroma inter; 1:  use eob_fast  for chroma inter
306     uint8_t
307         eob_fast_c_intra; // 0: do not use eob_fast  for chroma intra; 1:  use eob_fast  for chroma intra
308     uint8_t fp_q_l; // 0: use default quant for luma; 1: use fp_quant for luma
309     uint8_t fp_q_c; // 0: use default quant for chroma; 1: use fp_quant for chroma
310     uint8_t satd_factor; // do not perform rdoq if the tx satd > satd_factor
311     uint8_t
312         early_exit_th; // do not perform rdoq based on an early skip/non-skip cost, threshold for early exit is 5
313     uint8_t disallow_md_rdoq_uv;
314     uint8_t md_satd_factor;
315 } RdoqCtrls;
316 typedef struct NicCtrls {
317     uint8_t stage1_scaling_num; // Scaling numerator for post-stage 0 NICS: <x>/16
318     uint8_t stage2_scaling_num; // Scaling numerator for post-stage 1 NICS: <x>/16
319     uint8_t stage3_scaling_num; // Scaling numerator for post-stage 2 NICS: <x>/16
320 } NicCtrls;
321 typedef struct NicPruningCtrls {
322 
323     // class pruning signal(s)
324     // mdsx_class_th (for class removal); reduce cand if deviation to the best_cand is higher than mdsx_cand_th
325 
326     // All bands (except the last) are derived as follows:
327     // For band_index=0 to band_index=(mdsx_band_cnt-2),
328     //     band=[band_index*band_width, (band_index+1)*band_width]; band_width = mdsx_class_th/(band_cnt-1)
329     //     multiplier= 1 / ((band_index+1)*2)
330     // Last band is [mds1_class_th, +?] = kill (nic=0)
331 
332     // e.g. mds1_class_th=20 and mds1_band_cnt=3
333     // band_index  |0         |1        | 2       |
334     // band        |0 to 10   |10 to 20 | 20 to +?|
335     // action      |nic * 1   |nic * 1/2| nic *  0|
336 
337    // Post mds0
338     uint64_t mds1_class_th;
339     uint8_t  mds1_band_cnt; // >=2
340 
341     // Post mds1
342     uint64_t mds2_class_th;
343     uint8_t  mds2_band_cnt; // >=2
344 
345     // Post mds2
346     uint64_t mds3_class_th;
347     uint8_t  mds3_band_cnt; // >=2
348 
349     // cand pruning signal(s)
350     // mdsx_cand_th (for single cand removal per class); remove cand if deviation to the best_cand for @ the target class is higher than mdsx_cand_th
351     // mdsx_cand_th = base_th + sq_offset_th + intra_class_offset_th
352 
353     // Post mds0
354     uint64_t mds1_cand_base_th;               // base_th
355 
356 
357     // Post mds1
358     uint64_t mds2_cand_base_th;
359 
360     // Post mds2
361     uint64_t mds3_cand_base_th;
362 
363 } NicPruningCtrls;
364 typedef struct CandEliminationCtlrs {
365     uint32_t enabled;
366     uint8_t dc_only;
367     uint8_t inject_new_me;
368     uint8_t inject_new_pme;
369     uint8_t inject_new_warp;
370 }CandEliminationCtlrs;
371 typedef struct ModeDecisionContext {
372     EbDctor  dctor;
373     EbFifo * mode_decision_configuration_input_fifo_ptr;
374     EbFifo * mode_decision_output_fifo_ptr;
375     int16_t *transform_inner_array_ptr;
376 
377     ModeDecisionCandidate **       fast_candidate_ptr_array;
378     ModeDecisionCandidate *        fast_candidate_array;
379     ModeDecisionCandidateBuffer ** candidate_buffer_ptr_array;
380     ModeDecisionCandidateBuffer *  candidate_buffer_tx_depth_1;
381     ModeDecisionCandidateBuffer *  candidate_buffer_tx_depth_2;
382     MdRateEstimationContext *      md_rate_estimation_ptr;
383     EbBool                         is_md_rate_estimation_ptr_owner;
384     struct MdRateEstimationContext rate_est_table;
385     InterPredictionContext *       inter_prediction_context;
386     MdBlkStruct *                  md_local_blk_unit;
387     BlkStruct *                    md_blk_arr_nsq;
388     uint8_t *                      avail_blk_flag;
389     uint8_t* tested_blk_flag; //tells whether this CU is tested in MD.
390     uint8_t* do_not_process_blk;
391     MdcSbData *                    mdc_sb_array;
392 
393     NeighborArrayUnit *intra_luma_mode_neighbor_array;
394     NeighborArrayUnit *skip_flag_neighbor_array;
395     NeighborArrayUnit *mode_type_neighbor_array;
396     NeighborArrayUnit *luma_recon_neighbor_array;
397     NeighborArrayUnit *cb_recon_neighbor_array;
398     NeighborArrayUnit *cr_recon_neighbor_array;
399     NeighborArrayUnit *tx_search_luma_recon_neighbor_array;
400     NeighborArrayUnit *luma_recon_neighbor_array16bit;
401     NeighborArrayUnit *cb_recon_neighbor_array16bit;
402     NeighborArrayUnit *cr_recon_neighbor_array16bit;
403     NeighborArrayUnit *tx_search_luma_recon_neighbor_array16bit;
404     NeighborArrayUnit *
405         luma_dc_sign_level_coeff_neighbor_array; // Stored per 4x4. 8 bit: lower 6 bits (COEFF_CONTEXT_BITS), shows if there is at least one Coef. Top 2 bit store the sign of DC as follow: 0->0,1->-1,2-> 1
406     NeighborArrayUnit *
407         full_loop_luma_dc_sign_level_coeff_neighbor_array; // Stored per 4x4. 8 bit: lower 6 bits (COEFF_CONTEXT_BITS), shows if there is at least one Coef. Top 2 bit store the sign of DC as follow: 0->0,1->-1,2-> 1
408     NeighborArrayUnit *
409         tx_search_luma_dc_sign_level_coeff_neighbor_array; // Stored per 4x4. 8 bit: lower 6 bits (COEFF_CONTEXT_BITS), shows if there is at least one Coef. Top 2 bit store the sign of DC as follow: 0->0,1->-1,2-> 1
410     NeighborArrayUnit *
411         cr_dc_sign_level_coeff_neighbor_array; // Stored per 4x4. 8 bit: lower 6 bits(COEFF_CONTEXT_BITS), shows if there is at least one Coef. Top 2 bit store the sign of DC as follow: 0->0,1->-1,2-> 1
412     NeighborArrayUnit *
413                          cb_dc_sign_level_coeff_neighbor_array; // Stored per 4x4. 8 bit: lower 6 bits(COEFF_CONTEXT_BITS), shows if there is at least one Coef. Top 2 bit store the sign of DC as follow: 0->0,1->-1,2-> 1
414     NeighborArrayUnit *  txfm_context_array;
415     NeighborArrayUnit *  ref_frame_type_neighbor_array;
416     NeighborArrayUnit *  leaf_partition_neighbor_array;
417     NeighborArrayUnit32 *interpolation_type_neighbor_array;
418 
419     // Transform and Quantization Buffers
420     EbTransQuantBuffers * trans_quant_buffers_ptr;
421     struct EncDecContext *enc_dec_context_ptr;
422 
423     uint64_t *fast_cost_array;
424     uint64_t *full_cost_array;
425     uint64_t *full_cost_skip_ptr;
426     uint64_t *full_cost_merge_ptr;
427     // Lambda
428     uint32_t fast_lambda_md[2];
429     uint32_t full_lambda_md[2];
430     uint32_t full_sb_lambda_md
431         [2]; // for the case of lambda modulation (blk_lambda_tuning), full_lambda_md/fast_lambda_md corresponds
432         // to block lambda and full_sb_lambda_md is the full lambda per sb
433     EbBool blk_lambda_tuning;
434     //  Context Variables---------------------------------
435     SuperBlock *     sb_ptr;
436     BlkStruct *      blk_ptr;
437     const BlockGeom *blk_geom;
438     PredictionUnit * pu_ptr;
439     MvUnit           mv_unit;
440     PALETTE_BUFFER   palette_buffer;
441     PaletteInfo      palette_cand_array[MAX_PAL_CAND];
442     // Entropy Coder
443     MdEncPassCuData *md_ep_pipe_sb;
444 
445     uint8_t         sb64_sq_no4xn_geom;   //simple geometry 64x64SB, Sq only, no 4xN
446     uint8_t          pu_itr;
447     uint32_t         *best_candidate_index_array;
448     uint16_t         blk_origin_x;
449     uint16_t         blk_origin_y;
450     uint32_t         sb_origin_x;
451     uint32_t         sb_origin_y;
452     uint32_t         round_origin_x;
453     uint32_t         round_origin_y;
454     uint16_t         pu_origin_x;
455     uint16_t         pu_origin_y;
456     uint16_t         pu_width;
457     uint16_t         pu_height;
458     EbPfMode         pf_md_mode;
459     uint8_t          hbd_mode_decision;
460     uint8_t          qp_index;
461     uint64_t         three_quad_energy;
462     uint32_t         txb_1d_offset;
463     EbBool           uv_intra_comp_only;
464     UvPredictionMode best_uv_mode[UV_PAETH_PRED + 1][(MAX_ANGLE_DELTA << 1) + 1];
465     int32_t          best_uv_angle[UV_PAETH_PRED + 1][(MAX_ANGLE_DELTA << 1) + 1];
466     uint64_t         best_uv_cost[UV_PAETH_PRED + 1][(MAX_ANGLE_DELTA << 1) + 1];
467     uint64_t         fast_luma_rate[UV_PAETH_PRED + 1][(MAX_ANGLE_DELTA << 1) + 1];
468     uint64_t         fast_chroma_rate[UV_PAETH_PRED + 1][(MAX_ANGLE_DELTA << 1) + 1];
469     // Needed for DC prediction
470     int32_t is_inter_ctx;
471     uint8_t intra_luma_left_mode;
472     uint8_t intra_luma_top_mode;
473     EB_ALIGN(64)
474     int16_t pred_buf_q3
475         [CFL_BUF_SQUARE]; // Hsan: both MD and EP to use pred_buf_q3 (kept 1, and removed the 2nd)
476     uint8_t injected_ref_type_l0_array
477         [MODE_DECISION_CANDIDATE_MAX_COUNT]; // used to do not inject existing MV
478     uint8_t injected_ref_type_l1_array
479         [MODE_DECISION_CANDIDATE_MAX_COUNT]; // used to do not inject existing MV
480     uint8_t injected_ref_type_bipred_array
481         [MODE_DECISION_CANDIDATE_MAX_COUNT]; // used to do not inject existing MV
482     int16_t injected_mv_x_l0_array
483         [MODE_DECISION_CANDIDATE_MAX_COUNT]; // used to do not inject existing MV
484     int16_t injected_mv_y_l0_array
485         [MODE_DECISION_CANDIDATE_MAX_COUNT]; // used to do not inject existing MV
486     uint8_t injected_mv_count_l0;
487 
488     int16_t injected_mv_x_l1_array
489         [MODE_DECISION_CANDIDATE_MAX_COUNT]; // used to do not inject existing MV
490     int16_t injected_mv_y_l1_array
491         [MODE_DECISION_CANDIDATE_MAX_COUNT]; // used to do not inject existing MV
492     uint8_t injected_mv_count_l1;
493 
494     int16_t injected_mv_x_bipred_l0_array
495         [MODE_DECISION_CANDIDATE_MAX_COUNT]; // used to do not inject existing MV
496     int16_t injected_mv_y_bipred_l0_array
497         [MODE_DECISION_CANDIDATE_MAX_COUNT]; // used to do not inject existing MV
498     int16_t injected_mv_x_bipred_l1_array
499         [MODE_DECISION_CANDIDATE_MAX_COUNT]; // used to do not inject existing MV
500     int16_t injected_mv_y_bipred_l1_array
501         [MODE_DECISION_CANDIDATE_MAX_COUNT]; // used to do not inject existing MV
502     uint8_t              injected_mv_count_bipred;
503     uint32_t             fast_candidate_inter_count;
504     uint32_t             me_block_offset;
505     uint32_t             me_cand_offset;
506     EbPictureBufferDesc *cfl_temp_prediction_ptr;
507     EbPictureBufferDesc
508         *    residual_quant_coeff_ptr; // One buffer for residual and quantized coefficient
509     uint8_t  tx_depth;
510     uint8_t  txb_itr;
511     uint32_t me_sb_addr;
512     uint32_t geom_offset_x;
513     uint32_t geom_offset_y;
514     int16_t  luma_txb_skip_context;
515     int16_t  luma_dc_sign_context;
516     int16_t  cb_txb_skip_context;
517     int16_t  cb_dc_sign_context;
518     int16_t  cr_txb_skip_context;
519     int16_t  cr_dc_sign_context;
520     // Multi-modes signal(s)
521     uint8_t              parent_sq_type[MAX_PARENT_SQ];
522     uint8_t              parent_sq_pred_mode[MAX_PARENT_SQ];
523     uint8_t              chroma_level;
524     uint8_t              chroma_at_last_md_stage;
525     uint64_t             chroma_at_last_md_stage_intra_th;
526     uint64_t             chroma_at_last_md_stage_cfl_th;
527     uint8_t              global_mv_injection;
528     uint8_t              new_nearest_injection;
529     uint8_t              new_nearest_near_comb_injection;
530     uint8_t              warped_motion_injection;
531     uint8_t              unipred3x3_injection;
532     uint8_t              bipred3x3_injection;
533     uint8_t              redundant_blk;
534     uint8_t              nic_level;
535     uint8_t              similar_blk_avail;
536     uint16_t             similar_blk_mds;
537     uint8_t              inject_inter_candidates;
538     uint8_t *            cfl_temp_luma_recon;
539     uint16_t *           cfl_temp_luma_recon16bit;
540     EbBool               spatial_sse_full_loop_level;
541     EbBool               blk_skip_decision;
542     int8_t               rdoq_level;
543     int16_t              sb_me_mv[BLOCK_MAX_COUNT_SB_128][MAX_NUM_OF_REF_PIC_LIST][MAX_REF_IDX][2];
544     MV                   fp_me_mv[MAX_NUM_OF_REF_PIC_LIST][REF_LIST_MAX_DEPTH];
545     MV                   sub_me_mv[MAX_NUM_OF_REF_PIC_LIST][REF_LIST_MAX_DEPTH];
546     uint32_t             post_subpel_me_mv_cost[MAX_NUM_OF_REF_PIC_LIST][REF_LIST_MAX_DEPTH];
547     int16_t              best_pme_mv[MAX_NUM_OF_REF_PIC_LIST][MAX_REF_IDX][2];
548     int8_t               valid_pme_mv[MAX_NUM_OF_REF_PIC_LIST][MAX_REF_IDX];
549     EbPictureBufferDesc *input_sample16bit_buffer;
550     uint16_t             tile_index;
551     DECLARE_ALIGNED(16, uint8_t, pred0[2 * MAX_SB_SQUARE]);
552     DECLARE_ALIGNED(16, uint8_t, pred1[2 * MAX_SB_SQUARE]);
553     DECLARE_ALIGNED(32, int16_t, residual1[MAX_SB_SQUARE]);
554     DECLARE_ALIGNED(32, int16_t, diff10[MAX_SB_SQUARE]);
555     unsigned int prediction_mse;
556     MdStage      md_stage;
557     uint32_t     *cand_buff_indices[CAND_CLASS_TOTAL];
558     uint8_t      md_staging_mode;
559     uint8_t      bypass_md_stage_1[CAND_CLASS_TOTAL];
560     uint8_t      bypass_md_stage_2[CAND_CLASS_TOTAL];
561     uint32_t     md_stage_0_count[CAND_CLASS_TOTAL];
562     uint32_t     md_stage_1_count[CAND_CLASS_TOTAL];
563     uint32_t     md_stage_2_count[CAND_CLASS_TOTAL];
564     uint32_t     md_stage_3_count[CAND_CLASS_TOTAL];
565     uint32_t     md_stage_1_total_count;
566     uint32_t     md_stage_2_total_count;
567     uint32_t     md_stage_3_total_count;
568     uint32_t     md_stage_3_total_intra_count;
569     uint64_t     best_intra_cost;
570     uint64_t     best_inter_cost;
571     CandClass    target_class;
572 
573     // fast_loop_core signals
574     EbBool md_staging_skip_interpolation_search;
575     EbBool md_staging_skip_chroma_pred;
576     // full_loop_core signals
577     EbBool
578            md_staging_perform_inter_pred; // 0: perform luma & chroma prediction + interpolation search, 2: nothing (use information from previous stages)
579     EbBool md_staging_tx_size_mode; // 0: Tx Size recon only, 1:Tx Size search and recon
580     EbBool md_staging_txt_level;
581     EbBool md_staging_skip_full_chroma;
582     EbBool md_staging_skip_rdoq;
583     EbBool md_staging_spatial_sse_full_loop_level;
584     EbBool md_staging_perform_intra_chroma_pred;
585     DECLARE_ALIGNED(
586         16, uint8_t,
587         intrapred_buf[INTERINTRA_MODES][2 * 32 * 32]); //MAX block size for inter intra is 32x32
588     uint64_t *ref_best_cost_sq_table;
589     uint32_t *ref_best_ref_sq_table;
590     DECLARE_ALIGNED(16, uint8_t, obmc_buff_0[2 * 2 * MAX_MB_PLANE * MAX_SB_SQUARE]);
591     DECLARE_ALIGNED(16, uint8_t, obmc_buff_1[2 * 2 * MAX_MB_PLANE * MAX_SB_SQUARE]);
592     DECLARE_ALIGNED(16, uint8_t, obmc_buff_0_8b[2 * MAX_MB_PLANE * MAX_SB_SQUARE]);
593     DECLARE_ALIGNED(16, uint8_t, obmc_buff_1_8b[2 * MAX_MB_PLANE * MAX_SB_SQUARE]);
594     DECLARE_ALIGNED(16, int32_t, wsrc_buf[MAX_SB_SQUARE]);
595     DECLARE_ALIGNED(16, int32_t, mask_buf[MAX_SB_SQUARE]);
596     unsigned int pred_sse[REF_FRAMES];
597     uint8_t *    above_txfm_context;
598     uint8_t *    left_txfm_context;
599     // square cost weighting for deciding if a/b shapes could be skipped
600     uint32_t sq_weight;
601     uint32_t max_part0_to_part1_dev;
602     // signal for enabling shortcut to skip search depths
603     uint8_t dc_cand_only_flag;
604     EbBool  disable_angle_z2_intra_flag;
605     uint8_t shut_skip_ctx_dc_sign_update;
606     uint8_t shut_fast_rate; // use coeff rate and slipt flag rate only (no MVP derivation)
607     uint8_t fast_coeff_est_level; // estimate the rate of the first (eob/N) coeff(s) and last coeff only
608     uint8_t              interpolation_search_level;
609     uint8_t              md_tx_size_search_mode;
610     uint8_t              md_pic_obmc_level;
611     uint8_t              md_enable_paeth;
612     uint8_t              md_enable_smooth;
613     uint8_t              md_inter_intra_level;
614     uint8_t              md_filter_intra_level;
615     uint8_t              md_intra_angle_delta;
616     uint8_t              md_allow_intrabc;
617     uint8_t              md_palette_level;
618     uint8_t              dist_based_ref_pruning;
619     DepthRemovalCtrls    depth_removal_ctrls;
620     InDepthBlockSkipCtrls in_depth_block_skip_ctrls;
621     DepthCtrls           depth_ctrls; // control which depths can be considered in PD1
622     LowerDepthBlockSkipCtrls lower_depth_block_skip_ctrls;
623     DepthRefinementCtrls depth_refinement_ctrls;
624     int64_t parent_to_current_deviation;
625     int64_t child_to_current_deviation;
626     uint8_t              pf_level;
627     PfCtrls              pf_ctrls;
628     uint8_t              md_exit_th;
629     // Control signals for MD sparse search (used for increasing ME search for active clips)
630     uint8_t                md_sq_mv_search_level;
631     MdSqMotionSearchCtrls  md_sq_me_ctrls;
632     uint8_t                md_nsq_mv_search_level;
633     MdNsqMotionSearchCtrls md_nsq_motion_search_ctrls;
634     uint8_t                md_pme_level;
635     MdPmeCtrls             md_pme_ctrls;
636     uint8_t                md_subpel_me_level;
637     MdSubPelSearchCtrls    md_subpel_me_ctrls;
638     uint8_t                md_subpel_pme_level;
639     MdSubPelSearchCtrls    md_subpel_pme_ctrls;
640     RefResults             pme_res[MAX_NUM_OF_REF_PIC_LIST][REF_LIST_MAX_DEPTH];
641     ObmcControls           obmc_ctrls;
642     InterCompCtrls         inter_comp_ctrls;
643     InterIntraCompCtrls    inter_intra_comp_ctrls;
644     RefResults ref_filtering_res[TOT_INTER_GROUP][MAX_NUM_OF_REF_PIC_LIST][REF_LIST_MAX_DEPTH];
645     RefPruningControls ref_pruning_ctrls;
646     // Signal to control initial and final pass PD setting(s)
647     PdPass pd_pass;
648 
649     EbBool            md_disable_cfl;
650     TxtControls       txt_ctrls;
651     NearCountCtrls near_count_ctrls;
652     RdoqCtrls         rdoq_ctrls;
653     uint8_t           disallow_4x4;
654     uint8_t           md_disallow_nsq;
655     uint64_t          best_nsq_default_cost;
656     uint64_t          default_cost_per_shape[NUMBER_OF_SHAPES];
657     ParentSqCoeffAreaBasedCyclesReductionCtrls parent_sq_coeff_area_based_cycles_reduction_ctrls;
658     uint8_t           sb_size;
659 
660     EbPictureBufferDesc *recon_coeff_ptr[TX_TYPES];
661     EbPictureBufferDesc *recon_ptr[TX_TYPES];
662     uint8_t              skip_intra;
663     EbPictureBufferDesc *temp_residual_ptr;
664     EbPictureBufferDesc *temp_recon_ptr;
665     // Array for all nearest/near MVs for a block for single ref case
666     MV mvp_array[MAX_NUM_OF_REF_PIC_LIST][REF_LIST_MAX_DEPTH][MAX_MVP_CANIDATES];
667     // Count of all nearest/near MVs for a block for single ref case
668     int8_t mvp_count[MAX_NUM_OF_REF_PIC_LIST][REF_LIST_MAX_DEPTH];
669     // Start/end position for MD sparse search
670     int16_t sprs_lev0_start_x;
671     int16_t sprs_lev0_end_x;
672     int16_t sprs_lev0_start_y;
673     int16_t sprs_lev0_end_y;
674 
675     uint8_t         md_staging_tx_size_level;
676     NicCtrls        nic_ctrls;
677     NicPruningCtrls nic_pruning_ctrls;
678     uint8_t         inter_compound_mode;
679     MV              ref_mv;
680     uint8_t         ifs_is_regular_last; // If regular is last performed interp_filters @ IFS
681     uint8_t         use_prev_mds_res;
682     uint16_t        sb_index;
683     uint8_t         early_cand_elimination;
684     uint64_t        mds0_best_cost;
685     uint8_t         mds0_best_class;
686     uint8_t reduce_last_md_stage_candidate;
687     uint32_t mds0_best_idx;
688     CandClass mds0_best_class_it;
689     uint32_t mds1_best_idx;
690     CandClass mds1_best_class_it;
691     uint8_t use_var_in_mds0;
692     uint32_t md_me_cost[MAX_NUM_OF_REF_PIC_LIST][REF_LIST_MAX_DEPTH];
693     uint32_t md_me_dist;
694     uint8_t inject_new_me;
695     uint8_t inject_new_pme;
696     uint8_t inject_new_warp;
697     uint8_t merge_inter_classes;
698     uint8_t bypass_tx_search_when_zcoef;
699     uint64_t estimate_ref_frames_num_bits[MODE_CTX_REF_FRAMES][2]; // [TOTAL_REFS_PER_FRAME + 1][is_compound]
700     CandEliminationCtlrs cand_elimination_ctrs;
701     uint32_t early_txt_search_exit_level; // should be moved to txt_ctrls
702     uint8_t ep_use_md_skip_decision;
703     uint32_t max_nics ; // Maximum number of candidates MD can support
704     uint32_t max_nics_uv ; // Maximum number of candidates MD can support
705     uint8_t use_best_mds0;
706 } ModeDecisionContext;
707 
708 typedef void (*EbAv1LambdaAssignFunc)(PictureControlSet *pcs_ptr, uint32_t *fast_lambda,
709                                       uint32_t *full_lambda, uint8_t bit_depth, uint16_t qp_index,
710                                       EbBool multiply_lambda);
711 
712 /**************************************
713      * Extern Function Declarations
714      **************************************/
715 extern EbErrorType mode_decision_context_ctor(ModeDecisionContext *context_ptr,
716                                               EbColorFormat color_format, uint8_t sb_size,
717                                               uint8_t enc_mode,
718                                               EbFifo *mode_decision_configuration_input_fifo_ptr,
719                                               EbFifo *mode_decision_output_fifo_ptr,
720                                               uint8_t enable_hbd_mode_decision,
721                                               uint8_t cfg_palette);
722 
723 extern const EbAv1LambdaAssignFunc av1_lambda_assignment_function_table[4];
724 
725 // Table that converts 0-63 Q-range values passed in outside to the Qindex
726 // range used internally.
727 static const uint8_t quantizer_to_qindex[] = {
728     0,   4,   8,   12,  16,  20,  24,  28,  32,  36,  40,  44,  48,  52,  56,  60,
729     64,  68,  72,  76,  80,  84,  88,  92,  96,  100, 104, 108, 112, 116, 120, 124,
730     128, 132, 136, 140, 144, 148, 152, 156, 160, 164, 168, 172, 176, 180, 184, 188,
731     192, 196, 200, 204, 208, 212, 216, 220, 224, 228, 232, 236, 240, 244, 249, 255};
732 
733 extern void reset_mode_decision(SequenceControlSet *scs_ptr, ModeDecisionContext *context_ptr,
734                                 PictureControlSet *pcs_ptr, uint16_t tile_row_idx,
735                                 uint32_t segment_index);
736 
737 extern void mode_decision_configure_sb(ModeDecisionContext *context_ptr, PictureControlSet *pcs_ptr,
738                                        uint8_t sb_qp);
739 extern void md_cfl_rd_pick_alpha(PictureControlSet *          pcs_ptr,
740                                  ModeDecisionCandidateBuffer *candidate_buffer, SuperBlock *sb_ptr,
741                                  ModeDecisionContext *context_ptr,
742                                  EbPictureBufferDesc *input_picture_ptr,
743                                  uint32_t             input_cb_origin_in_index,
744                                  uint32_t             blk_chroma_origin_index);
745 
746 #ifdef __cplusplus
747 }
748 #endif
749 #endif // EbModeDecisionProcess_h
750