1 /* 2 * Copyright(c) 2019 Intel Corporation 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at https://www.aomedia.org/license/software-license. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at https://www.aomedia.org/license/patent-license. 10 */ 11 12 #ifndef EbModeDecisionProcess_h 13 #define EbModeDecisionProcess_h 14 15 #include "EbDefinitions.h" 16 #include "EbModeDecision.h" 17 #include "EbSyntaxElements.h" 18 #include "EbSystemResourceManager.h" 19 #include "EbPictureBufferDesc.h" 20 #include "EbEntropyCoding.h" 21 #include "EbTransQuantBuffers.h" 22 #include "EbReferenceObject.h" 23 #include "EbNeighborArrays.h" 24 #include "EbObject.h" 25 #include "EbEncInterPrediction.h" 26 27 #ifdef __cplusplus 28 extern "C" { 29 #endif 30 /************************************** 31 * Defines 32 **************************************/ 33 #define MODE_DECISION_CANDIDATE_MAX_COUNT_Y 1855 34 #define MODE_DECISION_CANDIDATE_MAX_COUNT (MODE_DECISION_CANDIDATE_MAX_COUNT_Y + 84) 35 #define DEPTH_ONE_STEP 21 36 #define DEPTH_TWO_STEP 5 37 #define DEPTH_THREE_STEP 1 38 #define MAX_MVP_CANIDATES 4 39 /************************************** 40 * Macros 41 **************************************/ 42 43 #define GROUP_OF_4_8x8_BLOCKS(origin_x, origin_y) \ 44 (((origin_x >> 3) & 0x1) && ((origin_y >> 3) & 0x1) ? EB_TRUE : EB_FALSE) 45 #define GROUP_OF_4_16x16_BLOCKS(origin_x, origin_y) \ 46 (((((origin_x >> 3) & 0x2) == 0x2) && (((origin_y >> 3) & 0x2) == 0x2)) ? EB_TRUE : EB_FALSE) 47 #define GROUP_OF_4_32x32_BLOCKS(origin_x, origin_y) \ 48 (((((origin_x >> 3) & 0x4) == 0x4) && (((origin_y >> 3) & 0x4) == 0x4)) ? EB_TRUE : EB_FALSE) 49 50 /************************************** 51 * Coding Loop Context 52 **************************************/ 53 typedef struct MdEncPassCuData { 54 uint64_t skip_cost; 55 uint64_t merge_cost; 56 uint64_t chroma_distortion; 57 } MdEncPassCuData; 58 59 typedef struct { 60 uint8_t best_palette_color_map[MAX_PALETTE_SQUARE]; 61 int kmeans_data_buf[2 * MAX_PALETTE_SQUARE]; 62 } PALETTE_BUFFER; 63 typedef struct MdBlkStruct { 64 unsigned mdc_array_index : 7; 65 unsigned count_non_zero_coeffs : 12; 66 unsigned top_neighbor_depth : 8; 67 unsigned left_neighbor_depth : 8; 68 unsigned full_distortion : 32; 69 uint64_t rec_dist_per_quadrant[4]; 70 PartitionContextType left_neighbor_partition; 71 PartitionContextType above_neighbor_partition; 72 uint64_t cost; 73 uint64_t 74 default_cost; // Similar to cost but does not get updated @ d1_non_square_block_decision() and d2_inter_depth_block_decision() 75 CandidateMv ed_ref_mv_stack[MODE_CTX_REF_FRAMES] 76 [MAX_REF_MV_STACK_SIZE]; //to be used in MD and EncDec 77 IntMv ref_mvs[MODE_CTX_REF_FRAMES][MAX_MV_REF_CANDIDATES]; //used only for nonCompound modes. 78 uint32_t best_d1_blk; 79 uint8_t *neigh_left_recon[3]; //only for MD 80 uint8_t *neigh_top_recon[3]; 81 uint16_t *neigh_left_recon_16bit[3]; 82 uint16_t *neigh_top_recon_16bit[3]; 83 int32_t quantized_dc[3][MAX_TXB_COUNT]; 84 85 uint8_t skip_mode_allowed; 86 // wm 87 EbWarpedMotionParams wm_params_l0; 88 EbWarpedMotionParams wm_params_l1; 89 // compound 90 uint8_t compound_idx; 91 InterInterCompoundData interinter_comp; 92 // txb 93 uint8_t u_has_coeff[TRANSFORM_UNIT_MAX_COUNT]; 94 uint8_t v_has_coeff[TRANSFORM_UNIT_MAX_COUNT]; 95 uint8_t y_has_coeff[TRANSFORM_UNIT_MAX_COUNT]; 96 } MdBlkStruct; 97 98 struct ModeDecisionCandidate; 99 struct ModeDecisionCandidateBuffer; 100 struct InterPredictionContext; 101 102 typedef struct RefResults { 103 uint8_t list_i; // list index of this ref 104 uint8_t ref_i; // ref list index of this ref 105 uint32_t dist; // distortion 106 uint8_t do_ref; // to process this ref or not 107 EbBool valid_ref; 108 } RefResults; 109 typedef enum InterCandGroup { 110 // elementary-groups 111 PA_ME_GROUP, 112 UNI_3x3_GROUP, 113 BI_3x3_GROUP, 114 NRST_NEW_NEAR_GROUP, 115 NRST_NEAR_GROUP, 116 PRED_ME_GROUP, 117 GLOBAL_GROUP, 118 // complex-groups 119 WARP_GROUP, 120 OBMC_GROUP, 121 INTER_INTRA_GROUP, 122 COMP_DIST, 123 COMP_DIFF, 124 COMP_WEDGE, 125 TOT_INTER_GROUP 126 } InterCandGroup; 127 typedef struct InterCompCtrls { 128 uint8_t allowed_comp_types 129 [MD_COMP_TYPES]; // Compound types to inject; AVG/DIST/DIFF/WEDGE (if a comp type is disallowed here, it will 130 // override distance-based settings) 131 uint8_t do_me; // if true, test all compound types for me 132 uint8_t do_pme; // if true, test all compound types for pme 133 uint8_t do_nearest_nearest; // if true, test all compound types for nearest_nearest 134 uint8_t do_near_near; // if true, test all compound types for near_near 135 uint8_t do_nearest_near_new; // if true, test all compound types for nearest_near_new 136 uint8_t do_3x3_bi; // if true, test all compound types for 3x3_bipred 137 138 uint8_t pred0_to_pred1_mult; // multiplier to the pred0_to_pred1_sad; 0: no pred0_to_pred1_sad-based pruning, >= 1: towards more inter-inter compound 139 uint8_t use_rate; // if true, use rate @ compound params derivation 140 } InterCompCtrls; 141 typedef struct InterIntraCompCtrls { 142 uint8_t enabled; 143 } InterIntraCompCtrls; 144 typedef struct ObmcControls { 145 uint8_t enabled; 146 EbBool max_blk_size_16x16; // if true, cap the max block size that OBMC can be used to 16x16 147 } ObmcControls; 148 typedef struct TxtControls { 149 uint8_t enabled; 150 151 uint8_t txt_group_inter_lt_16x16; // group to use when inter and tx block < 16x16 152 uint8_t txt_group_inter_gt_eq_16x16; // group to use when inter and tx block >= 16x16 153 154 uint8_t txt_group_intra_lt_16x16; // group to use when intra and tx block < 16x16 155 uint8_t txt_group_intra_gt_eq_16x16; // group to use when intra and tx block >= 16x16 156 } TxtControls; 157 typedef struct TxsCycleRControls { 158 uint8_t enabled; // On/Off feature control 159 uint16_t intra_th; // Threshold to bypass intra TXS <the higher th the higher speed> 160 uint16_t inter_th; // Threshold to bypass inter TXS <the higher th the higher speed> 161 } TxsCycleRControls; 162 163 typedef struct NearCountCtrls { 164 uint8_t enabled; 165 166 uint8_t near_count; // max # of near to consider 167 uint8_t near_near_count; // max # of near_near to consider 168 }NearCountCtrls; 169 170 typedef struct RefPruningControls { 171 uint8_t enabled; // 0: OFF; 1: use inter to inter distortion deviation to derive best_refs 172 uint32_t max_dev_to_best[TOT_INTER_GROUP]; // 0: OFF; 1: limit the injection to the best references based on distortion 173 uint32_t ref_idx_2_offset; 174 uint32_t ref_idx_3_offset; 175 uint8_t closest_refs 176 [TOT_INTER_GROUP]; // 0: OFF; 1: limit the injection to the closest references based on distance (LAST/BWD) 177 } RefPruningControls; 178 typedef struct DepthRefinementCtrls { 179 uint8_t enabled; 180 181 int64_t sub_to_current_th; // decrease towards a more agressive level 182 int64_t parent_to_current_th; // decrease towards a more agressive level 183 uint8_t up_to_2_depth; // when 1, a maximum of 2 depth per block (PRED+Parent or PRED+Sub), 0: no restriction(s) 184 uint8_t 185 use_pred_block_cost; // add an offset to sub_to_current_th and parent_to_current_th on the cost range of the predicted block; use default ths for high cost(s) and more aggressive TH(s) for low cost(s) 186 } DepthRefinementCtrls; 187 typedef struct DepthRemovalCtrls { 188 uint8_t enabled; 189 uint8_t disallow_below_64x64; // remove 32x32 blocks and below based on the sb_64x64 (me_distortion, variance) 190 uint8_t disallow_below_32x32; // remove 16x16 blocks and below based on the sb_64x64 (me_distortion, variance) 191 uint8_t disallow_below_16x16; // remove 8x8 blocks and below based on the sb_64x64 (me_distortion, variance) 192 }DepthRemovalCtrls; 193 typedef struct DepthCtrls { 194 int8_t s_depth; // start depth; 0: consider no parent blocks; else number of parent blocks to consider, specified as a negative number (e.g. -2 means consider 2 parents) 195 int8_t e_depth; // end depth; 0: consider no child blocks; else number of child blocks to consider, specified as a positive number (e.g. 2 means consider 2 children) 196 }DepthCtrls; 197 #define MAX_RANGE_CNT 8 198 typedef struct InDepthBlockSkipCtrls { 199 uint16_t base_weight; // 0: in-depth-block-skip OFF; 1: in-depth-block-skip ON 200 // higher towards more aggressive level(s) 201 // 0: the estimated cost for the next children is not taken into account and the action will be lossless compared to no in - depth - block - skip 202 // 100 : the normalized cost of next children is assumed to be equal to the normalized cost of past children 203 204 uint8_t cost_band_based_modulation; // whether to amplify the base_weight based on the cost range of the parent block or not 205 uint16_t max_cost_multiplier; // the max cost beyond which the base_weight is zeroed out 206 uint8_t max_band_cnt; // the number of band(s) 207 uint16_t weight_per_band[MAX_RANGE_CNT]; // the weight per band 208 209 uint8_t child_cnt_based_modulation; // whether to modulate based on the child count 210 uint16_t cnt_based_weight[3]; // to specify the weight per child cnt 211 212 } InDepthBlockSkipCtrls; 213 typedef struct LowerDepthBlockSkipCtrls { 214 uint8_t enabled; 215 float min_distortion_cost_ratio; // the distortion-to-cost ratio under wich the quad_deviation_th is zeroed out (feature is disabled) 216 float quad_deviation_th; // do not perform sub_depth if std_deviation of the 4 quadrants src-to-rec dist is less than std_deviation_th 217 uint8_t skip_all; // whether to skip all or only next depth; 0: skip only next depth; 1: skip all lower depths 218 }LowerDepthBlockSkipCtrls; 219 typedef struct PfCtrls { 220 EB_TRANS_COEFF_SHAPE pf_shape; 221 } PfCtrls; 222 typedef struct MdNsqMotionSearchCtrls { 223 uint8_t enabled; // 0: NSQ motion search @ MD OFF; 1: NSQ motion search @ MD ON 224 uint8_t use_ssd; // 0: search using SAD; 1: search using SSD 225 uint8_t full_pel_search_width; // Full Pel search area width 226 uint8_t full_pel_search_height; // Full Pel search area height 227 } MdNsqMotionSearchCtrls; 228 typedef struct MdSqMotionSearchCtrls { 229 uint8_t enabled; // 0: SQ motion search @ MD OFF; 1: SQ motion search @ MD ON 230 uint8_t use_ssd; // 0: search using SAD; 1: search using SSD 231 232 uint16_t 233 pame_distortion_th; // TH for pa_me distortion to determine whether to search (distortion per pixel) 234 235 uint8_t sprs_lev0_enabled; // 0: OFF; 1: ON 236 uint8_t sprs_lev0_step; // Sparse search step 237 uint16_t sprs_lev0_w; // Sparse search area width 238 uint16_t sprs_lev0_h; // Sparse search area height 239 uint16_t max_sprs_lev0_w; // Max Sparse search area width 240 uint16_t max_sprs_lev0_h; // Max Sparse search area height 241 int16_t sprs_lev0_multiplier; // search area multiplier (is a % -- 100 is no scaling) 242 243 uint8_t sprs_lev1_enabled; // 0: OFF; 1: ON 244 uint8_t sprs_lev1_step; // Sparse search step 245 uint16_t sprs_lev1_w; // Sparse search area width 246 uint16_t sprs_lev1_h; // Sparse search area height 247 uint16_t max_sprs_lev1_w; // Max Sparse search area width 248 uint16_t max_sprs_lev1_h; // Max Sparse search area height 249 int16_t sprs_lev1_multiplier; // search area multiplier (is a % -- 100 is no scaling) 250 251 uint8_t sprs_lev2_enabled; // 0: OFF; 1: ON 252 uint8_t sprs_lev2_step; // Sparse search step 253 uint16_t sprs_lev2_w; // Sparse search area width 254 uint16_t sprs_lev2_h; // Sparse search area height 255 } MdSqMotionSearchCtrls; 256 typedef struct MdPmeCtrls { 257 uint8_t enabled; // 0: PME search @ MD OFF; 1: PME search @ MD ON 258 uint8_t use_ssd; // 0: search using SAD; 1: search using SSD 259 uint8_t full_pel_search_width; // Full Pel search area width 260 uint8_t full_pel_search_height; // Full Pel search area height 261 int pre_fp_pme_to_me_cost_th; // If pre_fp_pme_to_me_cost higher than pre_fp_pme_to_me_cost_th then PME_MV = ME_MV and exit (decrease towards a faster level) 262 int pre_fp_pme_to_me_mv_th; // If pre_fp_pme_to_me_mv smaller than pre_fp_pme_to_me_mv_th then PME_MV = ME_MV and exit (increase towards a faster level) 263 int post_fp_pme_to_me_cost_th; // If post_fp_pme_to_me_cost higher than post_fp_pme_to_me_cost_th then PME_MV = ME_MV and exit (decrease towards a faster level) 264 int post_fp_pme_to_me_mv_th; // If post_fp_pme_to_me_mv smaller than post_fp_pme_to_me_mv_th then PME_MV = ME_MV and exit (increase towards a faster level) 265 } MdPmeCtrls; 266 typedef struct MdSubPelSearchCtrls { 267 uint8_t enabled; // 0: subpel search @ MD OFF; 1: subpel search @ MD ON 268 SUBPEL_SEARCH_TYPE 269 subpel_search_type; // USE_8_TAPS | USE_4_TAPS | USE_2_TAPS | USE_2_TAPS_ORIG (not supported) 270 int subpel_iters_per_step; // Maximum number of steps in logarithmic subpel search before giving up. 271 uint8_t eight_pel_search_enabled; // 0: OFF; 1: ON 272 SUBPEL_SEARCH_METHODS subpel_search_method; // Subpel_search_method can only be subpel_tree which does a subpixel 273 // logarithmic search that keeps stepping at 1/2 pixel units until 274 // you stop getting a gain, and then goes on to 1/4 and repeats 275 // the same process. Along the way it skips many diagonals. 276 } MdSubPelSearchCtrls; 277 typedef struct ParentSqCoeffAreaBasedCyclesReductionCtrls { 278 EbBool enabled; 279 280 uint8_t high_freq_band1_th; // cutoff for the highest coeff-area band [0-100] 281 uint8_t high_freq_band1_level; // level of action to use if luma coeff-area of parent SQ is >= high_freq_band1_th 282 uint8_t high_freq_band2_th; // cutoff for the second high coeff-area band [0-100]; should be less than high_freq_band1_th 283 uint8_t high_freq_band2_level; // level of action to use if luma coeff-area of parent SQ is >= high_freq_band2_th 284 uint8_t high_freq_band3_th; // cutoff for the third high coeff-area band [0-100]; should be less than high_freq_band2_th 285 uint8_t high_freq_band3_level; // level of action to use if luma coeff-area of parent SQ is >= high_freq_band3_th 286 287 uint8_t enable_zero_coeff_action; // enable for whether to apply action when parent SQ has 0 luma coefficients 288 uint8_t zero_coeff_action; // level of action to use if parent SQ has 0 luma coeffs 289 uint8_t enable_one_coeff_action; // enable for whether to apply action when parent SQ has 1 luma coefficients 290 uint8_t one_coeff_action; // level of action to use if parent SQ has 1 luma coeff 291 292 uint8_t low_freq_band1_th; // cutoff for the lowest coeff-area band [0-100]; should be less than high_freq_band2_th 293 uint8_t low_freq_band1_level; // level of action to use if luma coeff-area of parent SQ is < low_freq_band1_th 294 uint8_t low_freq_band2_th; // cutoff for the lowest coeff-area band [0-100]; should be less than high_freq_band2_th and larger than low_freq_band1_th 295 uint8_t low_freq_band2_level; // level of action to use if luma coeff-area of parent SQ is < low_freq_band2_th 296 }ParentSqCoeffAreaBasedCyclesReductionCtrls; 297 typedef struct RdoqCtrls { 298 uint8_t enabled; 299 300 uint8_t 301 eob_fast_l_inter; // 0: do not use eob_fast for luma inter; 1: use eob_fast for luma inter 302 uint8_t 303 eob_fast_l_intra; // 0: do not use eob_fast for luma intra; 1: use eob_fast for luma intra 304 uint8_t 305 eob_fast_c_inter; // 0: do not use eob_fast for chroma inter; 1: use eob_fast for chroma inter 306 uint8_t 307 eob_fast_c_intra; // 0: do not use eob_fast for chroma intra; 1: use eob_fast for chroma intra 308 uint8_t fp_q_l; // 0: use default quant for luma; 1: use fp_quant for luma 309 uint8_t fp_q_c; // 0: use default quant for chroma; 1: use fp_quant for chroma 310 uint8_t satd_factor; // do not perform rdoq if the tx satd > satd_factor 311 uint8_t 312 early_exit_th; // do not perform rdoq based on an early skip/non-skip cost, threshold for early exit is 5 313 uint8_t disallow_md_rdoq_uv; 314 uint8_t md_satd_factor; 315 } RdoqCtrls; 316 typedef struct NicCtrls { 317 uint8_t stage1_scaling_num; // Scaling numerator for post-stage 0 NICS: <x>/16 318 uint8_t stage2_scaling_num; // Scaling numerator for post-stage 1 NICS: <x>/16 319 uint8_t stage3_scaling_num; // Scaling numerator for post-stage 2 NICS: <x>/16 320 } NicCtrls; 321 typedef struct NicPruningCtrls { 322 323 // class pruning signal(s) 324 // mdsx_class_th (for class removal); reduce cand if deviation to the best_cand is higher than mdsx_cand_th 325 326 // All bands (except the last) are derived as follows: 327 // For band_index=0 to band_index=(mdsx_band_cnt-2), 328 // band=[band_index*band_width, (band_index+1)*band_width]; band_width = mdsx_class_th/(band_cnt-1) 329 // multiplier= 1 / ((band_index+1)*2) 330 // Last band is [mds1_class_th, +?] = kill (nic=0) 331 332 // e.g. mds1_class_th=20 and mds1_band_cnt=3 333 // band_index |0 |1 | 2 | 334 // band |0 to 10 |10 to 20 | 20 to +?| 335 // action |nic * 1 |nic * 1/2| nic * 0| 336 337 // Post mds0 338 uint64_t mds1_class_th; 339 uint8_t mds1_band_cnt; // >=2 340 341 // Post mds1 342 uint64_t mds2_class_th; 343 uint8_t mds2_band_cnt; // >=2 344 345 // Post mds2 346 uint64_t mds3_class_th; 347 uint8_t mds3_band_cnt; // >=2 348 349 // cand pruning signal(s) 350 // mdsx_cand_th (for single cand removal per class); remove cand if deviation to the best_cand for @ the target class is higher than mdsx_cand_th 351 // mdsx_cand_th = base_th + sq_offset_th + intra_class_offset_th 352 353 // Post mds0 354 uint64_t mds1_cand_base_th; // base_th 355 356 357 // Post mds1 358 uint64_t mds2_cand_base_th; 359 360 // Post mds2 361 uint64_t mds3_cand_base_th; 362 363 } NicPruningCtrls; 364 typedef struct CandEliminationCtlrs { 365 uint32_t enabled; 366 uint8_t dc_only; 367 uint8_t inject_new_me; 368 uint8_t inject_new_pme; 369 uint8_t inject_new_warp; 370 }CandEliminationCtlrs; 371 typedef struct ModeDecisionContext { 372 EbDctor dctor; 373 EbFifo * mode_decision_configuration_input_fifo_ptr; 374 EbFifo * mode_decision_output_fifo_ptr; 375 int16_t *transform_inner_array_ptr; 376 377 ModeDecisionCandidate ** fast_candidate_ptr_array; 378 ModeDecisionCandidate * fast_candidate_array; 379 ModeDecisionCandidateBuffer ** candidate_buffer_ptr_array; 380 ModeDecisionCandidateBuffer * candidate_buffer_tx_depth_1; 381 ModeDecisionCandidateBuffer * candidate_buffer_tx_depth_2; 382 MdRateEstimationContext * md_rate_estimation_ptr; 383 EbBool is_md_rate_estimation_ptr_owner; 384 struct MdRateEstimationContext rate_est_table; 385 InterPredictionContext * inter_prediction_context; 386 MdBlkStruct * md_local_blk_unit; 387 BlkStruct * md_blk_arr_nsq; 388 uint8_t * avail_blk_flag; 389 uint8_t* tested_blk_flag; //tells whether this CU is tested in MD. 390 uint8_t* do_not_process_blk; 391 MdcSbData * mdc_sb_array; 392 393 NeighborArrayUnit *intra_luma_mode_neighbor_array; 394 NeighborArrayUnit *skip_flag_neighbor_array; 395 NeighborArrayUnit *mode_type_neighbor_array; 396 NeighborArrayUnit *luma_recon_neighbor_array; 397 NeighborArrayUnit *cb_recon_neighbor_array; 398 NeighborArrayUnit *cr_recon_neighbor_array; 399 NeighborArrayUnit *tx_search_luma_recon_neighbor_array; 400 NeighborArrayUnit *luma_recon_neighbor_array16bit; 401 NeighborArrayUnit *cb_recon_neighbor_array16bit; 402 NeighborArrayUnit *cr_recon_neighbor_array16bit; 403 NeighborArrayUnit *tx_search_luma_recon_neighbor_array16bit; 404 NeighborArrayUnit * 405 luma_dc_sign_level_coeff_neighbor_array; // Stored per 4x4. 8 bit: lower 6 bits (COEFF_CONTEXT_BITS), shows if there is at least one Coef. Top 2 bit store the sign of DC as follow: 0->0,1->-1,2-> 1 406 NeighborArrayUnit * 407 full_loop_luma_dc_sign_level_coeff_neighbor_array; // Stored per 4x4. 8 bit: lower 6 bits (COEFF_CONTEXT_BITS), shows if there is at least one Coef. Top 2 bit store the sign of DC as follow: 0->0,1->-1,2-> 1 408 NeighborArrayUnit * 409 tx_search_luma_dc_sign_level_coeff_neighbor_array; // Stored per 4x4. 8 bit: lower 6 bits (COEFF_CONTEXT_BITS), shows if there is at least one Coef. Top 2 bit store the sign of DC as follow: 0->0,1->-1,2-> 1 410 NeighborArrayUnit * 411 cr_dc_sign_level_coeff_neighbor_array; // Stored per 4x4. 8 bit: lower 6 bits(COEFF_CONTEXT_BITS), shows if there is at least one Coef. Top 2 bit store the sign of DC as follow: 0->0,1->-1,2-> 1 412 NeighborArrayUnit * 413 cb_dc_sign_level_coeff_neighbor_array; // Stored per 4x4. 8 bit: lower 6 bits(COEFF_CONTEXT_BITS), shows if there is at least one Coef. Top 2 bit store the sign of DC as follow: 0->0,1->-1,2-> 1 414 NeighborArrayUnit * txfm_context_array; 415 NeighborArrayUnit * ref_frame_type_neighbor_array; 416 NeighborArrayUnit * leaf_partition_neighbor_array; 417 NeighborArrayUnit32 *interpolation_type_neighbor_array; 418 419 // Transform and Quantization Buffers 420 EbTransQuantBuffers * trans_quant_buffers_ptr; 421 struct EncDecContext *enc_dec_context_ptr; 422 423 uint64_t *fast_cost_array; 424 uint64_t *full_cost_array; 425 uint64_t *full_cost_skip_ptr; 426 uint64_t *full_cost_merge_ptr; 427 // Lambda 428 uint32_t fast_lambda_md[2]; 429 uint32_t full_lambda_md[2]; 430 uint32_t full_sb_lambda_md 431 [2]; // for the case of lambda modulation (blk_lambda_tuning), full_lambda_md/fast_lambda_md corresponds 432 // to block lambda and full_sb_lambda_md is the full lambda per sb 433 EbBool blk_lambda_tuning; 434 // Context Variables--------------------------------- 435 SuperBlock * sb_ptr; 436 BlkStruct * blk_ptr; 437 const BlockGeom *blk_geom; 438 PredictionUnit * pu_ptr; 439 MvUnit mv_unit; 440 PALETTE_BUFFER palette_buffer; 441 PaletteInfo palette_cand_array[MAX_PAL_CAND]; 442 // Entropy Coder 443 MdEncPassCuData *md_ep_pipe_sb; 444 445 uint8_t sb64_sq_no4xn_geom; //simple geometry 64x64SB, Sq only, no 4xN 446 uint8_t pu_itr; 447 uint32_t *best_candidate_index_array; 448 uint16_t blk_origin_x; 449 uint16_t blk_origin_y; 450 uint32_t sb_origin_x; 451 uint32_t sb_origin_y; 452 uint32_t round_origin_x; 453 uint32_t round_origin_y; 454 uint16_t pu_origin_x; 455 uint16_t pu_origin_y; 456 uint16_t pu_width; 457 uint16_t pu_height; 458 EbPfMode pf_md_mode; 459 uint8_t hbd_mode_decision; 460 uint8_t qp_index; 461 uint64_t three_quad_energy; 462 uint32_t txb_1d_offset; 463 EbBool uv_intra_comp_only; 464 UvPredictionMode best_uv_mode[UV_PAETH_PRED + 1][(MAX_ANGLE_DELTA << 1) + 1]; 465 int32_t best_uv_angle[UV_PAETH_PRED + 1][(MAX_ANGLE_DELTA << 1) + 1]; 466 uint64_t best_uv_cost[UV_PAETH_PRED + 1][(MAX_ANGLE_DELTA << 1) + 1]; 467 uint64_t fast_luma_rate[UV_PAETH_PRED + 1][(MAX_ANGLE_DELTA << 1) + 1]; 468 uint64_t fast_chroma_rate[UV_PAETH_PRED + 1][(MAX_ANGLE_DELTA << 1) + 1]; 469 // Needed for DC prediction 470 int32_t is_inter_ctx; 471 uint8_t intra_luma_left_mode; 472 uint8_t intra_luma_top_mode; 473 EB_ALIGN(64) 474 int16_t pred_buf_q3 475 [CFL_BUF_SQUARE]; // Hsan: both MD and EP to use pred_buf_q3 (kept 1, and removed the 2nd) 476 uint8_t injected_ref_type_l0_array 477 [MODE_DECISION_CANDIDATE_MAX_COUNT]; // used to do not inject existing MV 478 uint8_t injected_ref_type_l1_array 479 [MODE_DECISION_CANDIDATE_MAX_COUNT]; // used to do not inject existing MV 480 uint8_t injected_ref_type_bipred_array 481 [MODE_DECISION_CANDIDATE_MAX_COUNT]; // used to do not inject existing MV 482 int16_t injected_mv_x_l0_array 483 [MODE_DECISION_CANDIDATE_MAX_COUNT]; // used to do not inject existing MV 484 int16_t injected_mv_y_l0_array 485 [MODE_DECISION_CANDIDATE_MAX_COUNT]; // used to do not inject existing MV 486 uint8_t injected_mv_count_l0; 487 488 int16_t injected_mv_x_l1_array 489 [MODE_DECISION_CANDIDATE_MAX_COUNT]; // used to do not inject existing MV 490 int16_t injected_mv_y_l1_array 491 [MODE_DECISION_CANDIDATE_MAX_COUNT]; // used to do not inject existing MV 492 uint8_t injected_mv_count_l1; 493 494 int16_t injected_mv_x_bipred_l0_array 495 [MODE_DECISION_CANDIDATE_MAX_COUNT]; // used to do not inject existing MV 496 int16_t injected_mv_y_bipred_l0_array 497 [MODE_DECISION_CANDIDATE_MAX_COUNT]; // used to do not inject existing MV 498 int16_t injected_mv_x_bipred_l1_array 499 [MODE_DECISION_CANDIDATE_MAX_COUNT]; // used to do not inject existing MV 500 int16_t injected_mv_y_bipred_l1_array 501 [MODE_DECISION_CANDIDATE_MAX_COUNT]; // used to do not inject existing MV 502 uint8_t injected_mv_count_bipred; 503 uint32_t fast_candidate_inter_count; 504 uint32_t me_block_offset; 505 uint32_t me_cand_offset; 506 EbPictureBufferDesc *cfl_temp_prediction_ptr; 507 EbPictureBufferDesc 508 * residual_quant_coeff_ptr; // One buffer for residual and quantized coefficient 509 uint8_t tx_depth; 510 uint8_t txb_itr; 511 uint32_t me_sb_addr; 512 uint32_t geom_offset_x; 513 uint32_t geom_offset_y; 514 int16_t luma_txb_skip_context; 515 int16_t luma_dc_sign_context; 516 int16_t cb_txb_skip_context; 517 int16_t cb_dc_sign_context; 518 int16_t cr_txb_skip_context; 519 int16_t cr_dc_sign_context; 520 // Multi-modes signal(s) 521 uint8_t parent_sq_type[MAX_PARENT_SQ]; 522 uint8_t parent_sq_pred_mode[MAX_PARENT_SQ]; 523 uint8_t chroma_level; 524 uint8_t chroma_at_last_md_stage; 525 uint64_t chroma_at_last_md_stage_intra_th; 526 uint64_t chroma_at_last_md_stage_cfl_th; 527 uint8_t global_mv_injection; 528 uint8_t new_nearest_injection; 529 uint8_t new_nearest_near_comb_injection; 530 uint8_t warped_motion_injection; 531 uint8_t unipred3x3_injection; 532 uint8_t bipred3x3_injection; 533 uint8_t redundant_blk; 534 uint8_t nic_level; 535 uint8_t similar_blk_avail; 536 uint16_t similar_blk_mds; 537 uint8_t inject_inter_candidates; 538 uint8_t * cfl_temp_luma_recon; 539 uint16_t * cfl_temp_luma_recon16bit; 540 EbBool spatial_sse_full_loop_level; 541 EbBool blk_skip_decision; 542 int8_t rdoq_level; 543 int16_t sb_me_mv[BLOCK_MAX_COUNT_SB_128][MAX_NUM_OF_REF_PIC_LIST][MAX_REF_IDX][2]; 544 MV fp_me_mv[MAX_NUM_OF_REF_PIC_LIST][REF_LIST_MAX_DEPTH]; 545 MV sub_me_mv[MAX_NUM_OF_REF_PIC_LIST][REF_LIST_MAX_DEPTH]; 546 uint32_t post_subpel_me_mv_cost[MAX_NUM_OF_REF_PIC_LIST][REF_LIST_MAX_DEPTH]; 547 int16_t best_pme_mv[MAX_NUM_OF_REF_PIC_LIST][MAX_REF_IDX][2]; 548 int8_t valid_pme_mv[MAX_NUM_OF_REF_PIC_LIST][MAX_REF_IDX]; 549 EbPictureBufferDesc *input_sample16bit_buffer; 550 uint16_t tile_index; 551 DECLARE_ALIGNED(16, uint8_t, pred0[2 * MAX_SB_SQUARE]); 552 DECLARE_ALIGNED(16, uint8_t, pred1[2 * MAX_SB_SQUARE]); 553 DECLARE_ALIGNED(32, int16_t, residual1[MAX_SB_SQUARE]); 554 DECLARE_ALIGNED(32, int16_t, diff10[MAX_SB_SQUARE]); 555 unsigned int prediction_mse; 556 MdStage md_stage; 557 uint32_t *cand_buff_indices[CAND_CLASS_TOTAL]; 558 uint8_t md_staging_mode; 559 uint8_t bypass_md_stage_1[CAND_CLASS_TOTAL]; 560 uint8_t bypass_md_stage_2[CAND_CLASS_TOTAL]; 561 uint32_t md_stage_0_count[CAND_CLASS_TOTAL]; 562 uint32_t md_stage_1_count[CAND_CLASS_TOTAL]; 563 uint32_t md_stage_2_count[CAND_CLASS_TOTAL]; 564 uint32_t md_stage_3_count[CAND_CLASS_TOTAL]; 565 uint32_t md_stage_1_total_count; 566 uint32_t md_stage_2_total_count; 567 uint32_t md_stage_3_total_count; 568 uint32_t md_stage_3_total_intra_count; 569 uint64_t best_intra_cost; 570 uint64_t best_inter_cost; 571 CandClass target_class; 572 573 // fast_loop_core signals 574 EbBool md_staging_skip_interpolation_search; 575 EbBool md_staging_skip_chroma_pred; 576 // full_loop_core signals 577 EbBool 578 md_staging_perform_inter_pred; // 0: perform luma & chroma prediction + interpolation search, 2: nothing (use information from previous stages) 579 EbBool md_staging_tx_size_mode; // 0: Tx Size recon only, 1:Tx Size search and recon 580 EbBool md_staging_txt_level; 581 EbBool md_staging_skip_full_chroma; 582 EbBool md_staging_skip_rdoq; 583 EbBool md_staging_spatial_sse_full_loop_level; 584 EbBool md_staging_perform_intra_chroma_pred; 585 DECLARE_ALIGNED( 586 16, uint8_t, 587 intrapred_buf[INTERINTRA_MODES][2 * 32 * 32]); //MAX block size for inter intra is 32x32 588 uint64_t *ref_best_cost_sq_table; 589 uint32_t *ref_best_ref_sq_table; 590 DECLARE_ALIGNED(16, uint8_t, obmc_buff_0[2 * 2 * MAX_MB_PLANE * MAX_SB_SQUARE]); 591 DECLARE_ALIGNED(16, uint8_t, obmc_buff_1[2 * 2 * MAX_MB_PLANE * MAX_SB_SQUARE]); 592 DECLARE_ALIGNED(16, uint8_t, obmc_buff_0_8b[2 * MAX_MB_PLANE * MAX_SB_SQUARE]); 593 DECLARE_ALIGNED(16, uint8_t, obmc_buff_1_8b[2 * MAX_MB_PLANE * MAX_SB_SQUARE]); 594 DECLARE_ALIGNED(16, int32_t, wsrc_buf[MAX_SB_SQUARE]); 595 DECLARE_ALIGNED(16, int32_t, mask_buf[MAX_SB_SQUARE]); 596 unsigned int pred_sse[REF_FRAMES]; 597 uint8_t * above_txfm_context; 598 uint8_t * left_txfm_context; 599 // square cost weighting for deciding if a/b shapes could be skipped 600 uint32_t sq_weight; 601 uint32_t max_part0_to_part1_dev; 602 // signal for enabling shortcut to skip search depths 603 uint8_t dc_cand_only_flag; 604 EbBool disable_angle_z2_intra_flag; 605 uint8_t shut_skip_ctx_dc_sign_update; 606 uint8_t shut_fast_rate; // use coeff rate and slipt flag rate only (no MVP derivation) 607 uint8_t fast_coeff_est_level; // estimate the rate of the first (eob/N) coeff(s) and last coeff only 608 uint8_t interpolation_search_level; 609 uint8_t md_tx_size_search_mode; 610 uint8_t md_pic_obmc_level; 611 uint8_t md_enable_paeth; 612 uint8_t md_enable_smooth; 613 uint8_t md_inter_intra_level; 614 uint8_t md_filter_intra_level; 615 uint8_t md_intra_angle_delta; 616 uint8_t md_allow_intrabc; 617 uint8_t md_palette_level; 618 uint8_t dist_based_ref_pruning; 619 DepthRemovalCtrls depth_removal_ctrls; 620 InDepthBlockSkipCtrls in_depth_block_skip_ctrls; 621 DepthCtrls depth_ctrls; // control which depths can be considered in PD1 622 LowerDepthBlockSkipCtrls lower_depth_block_skip_ctrls; 623 DepthRefinementCtrls depth_refinement_ctrls; 624 int64_t parent_to_current_deviation; 625 int64_t child_to_current_deviation; 626 uint8_t pf_level; 627 PfCtrls pf_ctrls; 628 uint8_t md_exit_th; 629 // Control signals for MD sparse search (used for increasing ME search for active clips) 630 uint8_t md_sq_mv_search_level; 631 MdSqMotionSearchCtrls md_sq_me_ctrls; 632 uint8_t md_nsq_mv_search_level; 633 MdNsqMotionSearchCtrls md_nsq_motion_search_ctrls; 634 uint8_t md_pme_level; 635 MdPmeCtrls md_pme_ctrls; 636 uint8_t md_subpel_me_level; 637 MdSubPelSearchCtrls md_subpel_me_ctrls; 638 uint8_t md_subpel_pme_level; 639 MdSubPelSearchCtrls md_subpel_pme_ctrls; 640 RefResults pme_res[MAX_NUM_OF_REF_PIC_LIST][REF_LIST_MAX_DEPTH]; 641 ObmcControls obmc_ctrls; 642 InterCompCtrls inter_comp_ctrls; 643 InterIntraCompCtrls inter_intra_comp_ctrls; 644 RefResults ref_filtering_res[TOT_INTER_GROUP][MAX_NUM_OF_REF_PIC_LIST][REF_LIST_MAX_DEPTH]; 645 RefPruningControls ref_pruning_ctrls; 646 // Signal to control initial and final pass PD setting(s) 647 PdPass pd_pass; 648 649 EbBool md_disable_cfl; 650 TxtControls txt_ctrls; 651 NearCountCtrls near_count_ctrls; 652 RdoqCtrls rdoq_ctrls; 653 uint8_t disallow_4x4; 654 uint8_t md_disallow_nsq; 655 uint64_t best_nsq_default_cost; 656 uint64_t default_cost_per_shape[NUMBER_OF_SHAPES]; 657 ParentSqCoeffAreaBasedCyclesReductionCtrls parent_sq_coeff_area_based_cycles_reduction_ctrls; 658 uint8_t sb_size; 659 660 EbPictureBufferDesc *recon_coeff_ptr[TX_TYPES]; 661 EbPictureBufferDesc *recon_ptr[TX_TYPES]; 662 uint8_t skip_intra; 663 EbPictureBufferDesc *temp_residual_ptr; 664 EbPictureBufferDesc *temp_recon_ptr; 665 // Array for all nearest/near MVs for a block for single ref case 666 MV mvp_array[MAX_NUM_OF_REF_PIC_LIST][REF_LIST_MAX_DEPTH][MAX_MVP_CANIDATES]; 667 // Count of all nearest/near MVs for a block for single ref case 668 int8_t mvp_count[MAX_NUM_OF_REF_PIC_LIST][REF_LIST_MAX_DEPTH]; 669 // Start/end position for MD sparse search 670 int16_t sprs_lev0_start_x; 671 int16_t sprs_lev0_end_x; 672 int16_t sprs_lev0_start_y; 673 int16_t sprs_lev0_end_y; 674 675 uint8_t md_staging_tx_size_level; 676 NicCtrls nic_ctrls; 677 NicPruningCtrls nic_pruning_ctrls; 678 uint8_t inter_compound_mode; 679 MV ref_mv; 680 uint8_t ifs_is_regular_last; // If regular is last performed interp_filters @ IFS 681 uint8_t use_prev_mds_res; 682 uint16_t sb_index; 683 uint8_t early_cand_elimination; 684 uint64_t mds0_best_cost; 685 uint8_t mds0_best_class; 686 uint8_t reduce_last_md_stage_candidate; 687 uint32_t mds0_best_idx; 688 CandClass mds0_best_class_it; 689 uint32_t mds1_best_idx; 690 CandClass mds1_best_class_it; 691 uint8_t use_var_in_mds0; 692 uint32_t md_me_cost[MAX_NUM_OF_REF_PIC_LIST][REF_LIST_MAX_DEPTH]; 693 uint32_t md_me_dist; 694 uint8_t inject_new_me; 695 uint8_t inject_new_pme; 696 uint8_t inject_new_warp; 697 uint8_t merge_inter_classes; 698 uint8_t bypass_tx_search_when_zcoef; 699 uint64_t estimate_ref_frames_num_bits[MODE_CTX_REF_FRAMES][2]; // [TOTAL_REFS_PER_FRAME + 1][is_compound] 700 CandEliminationCtlrs cand_elimination_ctrs; 701 uint32_t early_txt_search_exit_level; // should be moved to txt_ctrls 702 uint8_t ep_use_md_skip_decision; 703 uint32_t max_nics ; // Maximum number of candidates MD can support 704 uint32_t max_nics_uv ; // Maximum number of candidates MD can support 705 uint8_t use_best_mds0; 706 } ModeDecisionContext; 707 708 typedef void (*EbAv1LambdaAssignFunc)(PictureControlSet *pcs_ptr, uint32_t *fast_lambda, 709 uint32_t *full_lambda, uint8_t bit_depth, uint16_t qp_index, 710 EbBool multiply_lambda); 711 712 /************************************** 713 * Extern Function Declarations 714 **************************************/ 715 extern EbErrorType mode_decision_context_ctor(ModeDecisionContext *context_ptr, 716 EbColorFormat color_format, uint8_t sb_size, 717 uint8_t enc_mode, 718 EbFifo *mode_decision_configuration_input_fifo_ptr, 719 EbFifo *mode_decision_output_fifo_ptr, 720 uint8_t enable_hbd_mode_decision, 721 uint8_t cfg_palette); 722 723 extern const EbAv1LambdaAssignFunc av1_lambda_assignment_function_table[4]; 724 725 // Table that converts 0-63 Q-range values passed in outside to the Qindex 726 // range used internally. 727 static const uint8_t quantizer_to_qindex[] = { 728 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 729 64, 68, 72, 76, 80, 84, 88, 92, 96, 100, 104, 108, 112, 116, 120, 124, 730 128, 132, 136, 140, 144, 148, 152, 156, 160, 164, 168, 172, 176, 180, 184, 188, 731 192, 196, 200, 204, 208, 212, 216, 220, 224, 228, 232, 236, 240, 244, 249, 255}; 732 733 extern void reset_mode_decision(SequenceControlSet *scs_ptr, ModeDecisionContext *context_ptr, 734 PictureControlSet *pcs_ptr, uint16_t tile_row_idx, 735 uint32_t segment_index); 736 737 extern void mode_decision_configure_sb(ModeDecisionContext *context_ptr, PictureControlSet *pcs_ptr, 738 uint8_t sb_qp); 739 extern void md_cfl_rd_pick_alpha(PictureControlSet * pcs_ptr, 740 ModeDecisionCandidateBuffer *candidate_buffer, SuperBlock *sb_ptr, 741 ModeDecisionContext *context_ptr, 742 EbPictureBufferDesc *input_picture_ptr, 743 uint32_t input_cb_origin_in_index, 744 uint32_t blk_chroma_origin_index); 745 746 #ifdef __cplusplus 747 } 748 #endif 749 #endif // EbModeDecisionProcess_h 750