1 /*****************************************************************************
2 * Copyright (C) 2013-2020 MulticoreWare, Inc
3 *
4 * Authors: Steve Borho <steve@borho.org>
5 * Min Chen <chenm003@163.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
20 *
21 * This program is also available under a commercial proprietary license.
22 * For more information, contact us at license @ x265.com.
23 *****************************************************************************/
24
25 #include "common.h"
26 #include "framedata.h"
27 #include "scalinglist.h"
28 #include "quant.h"
29 #include "contexts.h"
30 #include "picyuv.h"
31
32 #include "sao.h"
33 #include "entropy.h"
34
35 #define CU_DQP_TU_CMAX 5 // max number bins for truncated unary
36 #define CU_DQP_EG_k 0 // exp-golomb order
37 #define START_VALUE 8 // start value for dpcm mode
38
39 namespace X265_NS {
40
41 // initial probability for cu_transquant_bypass flag
42 static const uint8_t INIT_CU_TRANSQUANT_BYPASS_FLAG[3][NUM_TQUANT_BYPASS_FLAG_CTX] =
43 {
44 { 154 },
45 { 154 },
46 { 154 },
47 };
48
49 // initial probability for split flag
50 static const uint8_t INIT_SPLIT_FLAG[3][NUM_SPLIT_FLAG_CTX] =
51 {
52 { 107, 139, 126, },
53 { 107, 139, 126, },
54 { 139, 141, 157, },
55 };
56
57 static const uint8_t INIT_SKIP_FLAG[3][NUM_SKIP_FLAG_CTX] =
58 {
59 { 197, 185, 201, },
60 { 197, 185, 201, },
61 { CNU, CNU, CNU, },
62 };
63
64 static const uint8_t INIT_MERGE_FLAG_EXT[3][NUM_MERGE_FLAG_EXT_CTX] =
65 {
66 { 154, },
67 { 110, },
68 { CNU, },
69 };
70
71 static const uint8_t INIT_MERGE_IDX_EXT[3][NUM_MERGE_IDX_EXT_CTX] =
72 {
73 { 137, },
74 { 122, },
75 { CNU, },
76 };
77
78 static const uint8_t INIT_PART_SIZE[3][NUM_PART_SIZE_CTX] =
79 {
80 { 154, 139, 154, 154 },
81 { 154, 139, 154, 154 },
82 { 184, CNU, CNU, CNU },
83 };
84
85 static const uint8_t INIT_PRED_MODE[3][NUM_PRED_MODE_CTX] =
86 {
87 { 134, },
88 { 149, },
89 { CNU, },
90 };
91
92 static const uint8_t INIT_INTRA_PRED_MODE[3][NUM_ADI_CTX] =
93 {
94 { 183, },
95 { 154, },
96 { 184, },
97 };
98
99 static const uint8_t INIT_CHROMA_PRED_MODE[3][NUM_CHROMA_PRED_CTX] =
100 {
101 { 152, 139, },
102 { 152, 139, },
103 { 63, 139, },
104 };
105
106 static const uint8_t INIT_INTER_DIR[3][NUM_INTER_DIR_CTX] =
107 {
108 { 95, 79, 63, 31, 31, },
109 { 95, 79, 63, 31, 31, },
110 { CNU, CNU, CNU, CNU, CNU, },
111 };
112
113 static const uint8_t INIT_MVD[3][NUM_MV_RES_CTX] =
114 {
115 { 169, 198, },
116 { 140, 198, },
117 { CNU, CNU, },
118 };
119
120 static const uint8_t INIT_REF_PIC[3][NUM_REF_NO_CTX] =
121 {
122 { 153, 153 },
123 { 153, 153 },
124 { CNU, CNU },
125 };
126
127 static const uint8_t INIT_DQP[3][NUM_DELTA_QP_CTX] =
128 {
129 { 154, 154, 154, },
130 { 154, 154, 154, },
131 { 154, 154, 154, },
132 };
133
134 static const uint8_t INIT_QT_CBF[3][NUM_QT_CBF_CTX] =
135 {
136 { 153, 111, 149, 92, 167, 154, 154 },
137 { 153, 111, 149, 107, 167, 154, 154 },
138 { 111, 141, 94, 138, 182, 154, 154 },
139 };
140
141 static const uint8_t INIT_QT_ROOT_CBF[3][NUM_QT_ROOT_CBF_CTX] =
142 {
143 { 79, },
144 { 79, },
145 { CNU, },
146 };
147
148 static const uint8_t INIT_LAST[3][NUM_CTX_LAST_FLAG_XY] =
149 {
150 { 125, 110, 124, 110, 95, 94, 125, 111, 111, 79, 125, 126, 111, 111, 79,
151 108, 123, 93 },
152 { 125, 110, 94, 110, 95, 79, 125, 111, 110, 78, 110, 111, 111, 95, 94,
153 108, 123, 108 },
154 { 110, 110, 124, 125, 140, 153, 125, 127, 140, 109, 111, 143, 127, 111, 79,
155 108, 123, 63 },
156 };
157
158 static const uint8_t INIT_SIG_CG_FLAG[3][2 * NUM_SIG_CG_FLAG_CTX] =
159 {
160 { 121, 140,
161 61, 154, },
162 { 121, 140,
163 61, 154, },
164 { 91, 171,
165 134, 141, },
166 };
167
168 static const uint8_t INIT_SIG_FLAG[3][NUM_SIG_FLAG_CTX] =
169 {
170 { 170, 154, 139, 153, 139, 123, 123, 63, 124, 166, 183, 140, 136, 153, 154, 166, 183, 140, 136, 153, 154, 166, 183, 140, 136, 153, 154, 170, 153, 138, 138, 122, 121, 122, 121, 167, 151, 183, 140, 151, 183, 140, },
171 { 155, 154, 139, 153, 139, 123, 123, 63, 153, 166, 183, 140, 136, 153, 154, 166, 183, 140, 136, 153, 154, 166, 183, 140, 136, 153, 154, 170, 153, 123, 123, 107, 121, 107, 121, 167, 151, 183, 140, 151, 183, 140, },
172 { 111, 111, 125, 110, 110, 94, 124, 108, 124, 107, 125, 141, 179, 153, 125, 107, 125, 141, 179, 153, 125, 107, 125, 141, 179, 153, 125, 140, 139, 182, 182, 152, 136, 152, 136, 153, 136, 139, 111, 136, 139, 111, },
173 };
174
175 static const uint8_t INIT_ONE_FLAG[3][NUM_ONE_FLAG_CTX] =
176 {
177 { 154, 196, 167, 167, 154, 152, 167, 182, 182, 134, 149, 136, 153, 121, 136, 122, 169, 208, 166, 167, 154, 152, 167, 182, },
178 { 154, 196, 196, 167, 154, 152, 167, 182, 182, 134, 149, 136, 153, 121, 136, 137, 169, 194, 166, 167, 154, 167, 137, 182, },
179 { 140, 92, 137, 138, 140, 152, 138, 139, 153, 74, 149, 92, 139, 107, 122, 152, 140, 179, 166, 182, 140, 227, 122, 197, },
180 };
181
182 static const uint8_t INIT_ABS_FLAG[3][NUM_ABS_FLAG_CTX] =
183 {
184 { 107, 167, 91, 107, 107, 167, },
185 { 107, 167, 91, 122, 107, 167, },
186 { 138, 153, 136, 167, 152, 152, },
187 };
188
189 static const uint8_t INIT_MVP_IDX[3][NUM_MVP_IDX_CTX] =
190 {
191 { 168 },
192 { 168 },
193 { CNU },
194 };
195
196 static const uint8_t INIT_SAO_MERGE_FLAG[3][NUM_SAO_MERGE_FLAG_CTX] =
197 {
198 { 153, },
199 { 153, },
200 { 153, },
201 };
202
203 static const uint8_t INIT_SAO_TYPE_IDX[3][NUM_SAO_TYPE_IDX_CTX] =
204 {
205 { 160, },
206 { 185, },
207 { 200, },
208 };
209
210 static const uint8_t INIT_TRANS_SUBDIV_FLAG[3][NUM_TRANS_SUBDIV_FLAG_CTX] =
211 {
212 { 224, 167, 122, },
213 { 124, 138, 94, },
214 { 153, 138, 138, },
215 };
216
217 static const uint8_t INIT_TRANSFORMSKIP_FLAG[3][2 * NUM_TRANSFORMSKIP_FLAG_CTX] =
218 {
219 { 139, 139 },
220 { 139, 139 },
221 { 139, 139 },
222 };
223
Entropy()224 Entropy::Entropy()
225 {
226 markValid();
227 m_fracBits = 0;
228 m_pad = 0;
229 m_meanQP = 0;
230 X265_CHECK(sizeof(m_contextState) >= sizeof(m_contextState[0]) * MAX_OFF_CTX_MOD, "context state table is too small\n");
231 }
232
codeVPS(const VPS & vps)233 void Entropy::codeVPS(const VPS& vps)
234 {
235 WRITE_CODE(0, 4, "vps_video_parameter_set_id");
236 WRITE_CODE(3, 2, "vps_reserved_three_2bits");
237 WRITE_CODE(0, 6, "vps_reserved_zero_6bits");
238 WRITE_CODE(vps.maxTempSubLayers - 1, 3, "vps_max_sub_layers_minus1");
239 WRITE_FLAG(vps.maxTempSubLayers == 1, "vps_temporal_id_nesting_flag");
240 WRITE_CODE(0xffff, 16, "vps_reserved_ffff_16bits");
241
242 codeProfileTier(vps.ptl, vps.maxTempSubLayers);
243
244 WRITE_FLAG(true, "vps_sub_layer_ordering_info_present_flag");
245
246 for (uint32_t i = 0; i < vps.maxTempSubLayers; i++)
247 {
248 WRITE_UVLC(vps.maxDecPicBuffering - 1, "vps_max_dec_pic_buffering_minus1[i]");
249 WRITE_UVLC(vps.numReorderPics, "vps_num_reorder_pics[i]");
250 WRITE_UVLC(vps.maxLatencyIncrease + 1, "vps_max_latency_increase_plus1[i]");
251 }
252
253 WRITE_CODE(0, 6, "vps_max_nuh_reserved_zero_layer_id");
254 WRITE_UVLC(0, "vps_max_op_sets_minus1");
255 WRITE_FLAG(0, "vps_timing_info_present_flag"); /* we signal timing info in SPS-VUI */
256 WRITE_FLAG(0, "vps_extension_flag");
257 }
258
codeSPS(const SPS & sps,const ScalingList & scalingList,const ProfileTierLevel & ptl)259 void Entropy::codeSPS(const SPS& sps, const ScalingList& scalingList, const ProfileTierLevel& ptl)
260 {
261 WRITE_CODE(0, 4, "sps_video_parameter_set_id");
262 WRITE_CODE(sps.maxTempSubLayers - 1, 3, "sps_max_sub_layers_minus1");
263 WRITE_FLAG(sps.maxTempSubLayers == 1, "sps_temporal_id_nesting_flag");
264
265 codeProfileTier(ptl, sps.maxTempSubLayers);
266
267 WRITE_UVLC(0, "sps_seq_parameter_set_id");
268 WRITE_UVLC(sps.chromaFormatIdc, "chroma_format_idc");
269
270 if (sps.chromaFormatIdc == X265_CSP_I444)
271 WRITE_FLAG(0, "separate_colour_plane_flag");
272
273 WRITE_UVLC(sps.picWidthInLumaSamples, "pic_width_in_luma_samples");
274 WRITE_UVLC(sps.picHeightInLumaSamples, "pic_height_in_luma_samples");
275
276 const Window& conf = sps.conformanceWindow;
277 WRITE_FLAG(conf.bEnabled, "conformance_window_flag");
278 if (conf.bEnabled)
279 {
280 int hShift = CHROMA_H_SHIFT(sps.chromaFormatIdc), vShift = CHROMA_V_SHIFT(sps.chromaFormatIdc);
281 WRITE_UVLC(conf.leftOffset >> hShift, "conf_win_left_offset");
282 WRITE_UVLC(conf.rightOffset >> hShift, "conf_win_right_offset");
283 WRITE_UVLC(conf.topOffset >> vShift, "conf_win_top_offset");
284 WRITE_UVLC(conf.bottomOffset >> vShift, "conf_win_bottom_offset");
285 }
286
287 WRITE_UVLC(X265_DEPTH - 8, "bit_depth_luma_minus8");
288 WRITE_UVLC(X265_DEPTH - 8, "bit_depth_chroma_minus8");
289 WRITE_UVLC(sps.log2MaxPocLsb - 4, "log2_max_pic_order_cnt_lsb_minus4");
290 WRITE_FLAG(true, "sps_sub_layer_ordering_info_present_flag");
291
292 for (uint32_t i = 0; i < sps.maxTempSubLayers; i++)
293 {
294 WRITE_UVLC(sps.maxDecPicBuffering - 1, "sps_max_dec_pic_buffering_minus1[i]");
295 WRITE_UVLC(sps.numReorderPics, "sps_num_reorder_pics[i]");
296 WRITE_UVLC(sps.maxLatencyIncrease + 1, "sps_max_latency_increase_plus1[i]");
297 }
298
299 WRITE_UVLC(sps.log2MinCodingBlockSize - 3, "log2_min_coding_block_size_minus3");
300 WRITE_UVLC(sps.log2DiffMaxMinCodingBlockSize, "log2_diff_max_min_coding_block_size");
301 WRITE_UVLC(sps.quadtreeTULog2MinSize - 2, "log2_min_transform_block_size_minus2");
302 WRITE_UVLC(sps.quadtreeTULog2MaxSize - sps.quadtreeTULog2MinSize, "log2_diff_max_min_transform_block_size");
303 WRITE_UVLC(sps.quadtreeTUMaxDepthInter - 1, "max_transform_hierarchy_depth_inter");
304 WRITE_UVLC(sps.quadtreeTUMaxDepthIntra - 1, "max_transform_hierarchy_depth_intra");
305 WRITE_FLAG(scalingList.m_bEnabled, "scaling_list_enabled_flag");
306 if (scalingList.m_bEnabled)
307 {
308 WRITE_FLAG(scalingList.m_bDataPresent, "sps_scaling_list_data_present_flag");
309 if (scalingList.m_bDataPresent)
310 codeScalingList(scalingList);
311 }
312 WRITE_FLAG(sps.bUseAMP, "amp_enabled_flag");
313 WRITE_FLAG(sps.bUseSAO, "sample_adaptive_offset_enabled_flag");
314
315 WRITE_FLAG(0, "pcm_enabled_flag");
316 WRITE_UVLC(sps.spsrpsNum, "num_short_term_ref_pic_sets");
317 for (int i = 0; i < sps.spsrpsNum; i++)
318 codeShortTermRefPicSet(sps.spsrps[i], i);
319 WRITE_FLAG(0, "long_term_ref_pics_present_flag");
320
321 WRITE_FLAG(sps.bTemporalMVPEnabled, "sps_temporal_mvp_enable_flag");
322 WRITE_FLAG(sps.bUseStrongIntraSmoothing, "sps_strong_intra_smoothing_enable_flag");
323
324 WRITE_FLAG(1, "vui_parameters_present_flag");
325 codeVUI(sps.vuiParameters, sps.maxTempSubLayers, sps.bEmitVUITimingInfo, sps.bEmitVUIHRDInfo);
326
327 WRITE_FLAG(0, "sps_extension_flag");
328 }
329
codePPS(const PPS & pps,bool filerAcross,int iPPSInitQpMinus26)330 void Entropy::codePPS( const PPS& pps, bool filerAcross, int iPPSInitQpMinus26 )
331 {
332 WRITE_UVLC(0, "pps_pic_parameter_set_id");
333 WRITE_UVLC(0, "pps_seq_parameter_set_id");
334 WRITE_FLAG(0, "dependent_slice_segments_enabled_flag");
335 WRITE_FLAG(0, "output_flag_present_flag");
336 WRITE_CODE(0, 3, "num_extra_slice_header_bits");
337 WRITE_FLAG(pps.bSignHideEnabled, "sign_data_hiding_flag");
338 WRITE_FLAG(0, "cabac_init_present_flag");
339 WRITE_UVLC(pps.numRefIdxDefault[0] - 1, "num_ref_idx_l0_default_active_minus1");
340 WRITE_UVLC(pps.numRefIdxDefault[1] - 1, "num_ref_idx_l1_default_active_minus1");
341
342 WRITE_SVLC(iPPSInitQpMinus26, "init_qp_minus26");
343 WRITE_FLAG(pps.bConstrainedIntraPred, "constrained_intra_pred_flag");
344 WRITE_FLAG(pps.bTransformSkipEnabled, "transform_skip_enabled_flag");
345
346 WRITE_FLAG(pps.bUseDQP, "cu_qp_delta_enabled_flag");
347 if (pps.bUseDQP)
348 WRITE_UVLC(pps.maxCuDQPDepth, "diff_cu_qp_delta_depth");
349
350 WRITE_SVLC(pps.chromaQpOffset[0], "pps_cb_qp_offset");
351 WRITE_SVLC(pps.chromaQpOffset[1], "pps_cr_qp_offset");
352 WRITE_FLAG(pps.pps_slice_chroma_qp_offsets_present_flag, "pps_slice_chroma_qp_offsets_present_flag");
353
354 WRITE_FLAG(pps.bUseWeightPred, "weighted_pred_flag");
355 WRITE_FLAG(pps.bUseWeightedBiPred, "weighted_bipred_flag");
356 WRITE_FLAG(pps.bTransquantBypassEnabled, "transquant_bypass_enable_flag");
357 WRITE_FLAG(0, "tiles_enabled_flag");
358 WRITE_FLAG(pps.bEntropyCodingSyncEnabled, "entropy_coding_sync_enabled_flag");
359 WRITE_FLAG(filerAcross, "loop_filter_across_slices_enabled_flag");
360
361 WRITE_FLAG(pps.bDeblockingFilterControlPresent, "deblocking_filter_control_present_flag");
362 if (pps.bDeblockingFilterControlPresent)
363 {
364 WRITE_FLAG(0, "deblocking_filter_override_enabled_flag");
365 WRITE_FLAG(pps.bPicDisableDeblockingFilter, "pps_disable_deblocking_filter_flag");
366 if (!pps.bPicDisableDeblockingFilter)
367 {
368 WRITE_SVLC(pps.deblockingFilterBetaOffsetDiv2, "pps_beta_offset_div2");
369 WRITE_SVLC(pps.deblockingFilterTcOffsetDiv2, "pps_tc_offset_div2");
370 }
371 }
372
373 WRITE_FLAG(0, "pps_scaling_list_data_present_flag");
374 WRITE_FLAG(0, "lists_modification_present_flag");
375 WRITE_UVLC(0, "log2_parallel_merge_level_minus2");
376 WRITE_FLAG(0, "slice_segment_header_extension_present_flag");
377 WRITE_FLAG(0, "pps_extension_flag");
378 }
379
codeProfileTier(const ProfileTierLevel & ptl,int maxTempSubLayers)380 void Entropy::codeProfileTier(const ProfileTierLevel& ptl, int maxTempSubLayers)
381 {
382 WRITE_CODE(0, 2, "XXX_profile_space[]");
383 WRITE_FLAG(ptl.tierFlag, "XXX_tier_flag[]");
384 WRITE_CODE(ptl.profileIdc, 5, "XXX_profile_idc[]");
385 for (int j = 0; j < 32; j++)
386 WRITE_FLAG(ptl.profileCompatibilityFlag[j], "XXX_profile_compatibility_flag[][j]");
387
388 WRITE_FLAG(ptl.progressiveSourceFlag, "general_progressive_source_flag");
389 WRITE_FLAG(ptl.interlacedSourceFlag, "general_interlaced_source_flag");
390 WRITE_FLAG(ptl.nonPackedConstraintFlag, "general_non_packed_constraint_flag");
391 WRITE_FLAG(ptl.frameOnlyConstraintFlag, "general_frame_only_constraint_flag");
392
393 if (ptl.profileIdc == Profile::MAINREXT || ptl.profileIdc == Profile::HIGHTHROUGHPUTREXT)
394 {
395 uint32_t bitDepthConstraint = ptl.bitDepthConstraint;
396 int csp = ptl.chromaFormatConstraint;
397 WRITE_FLAG(bitDepthConstraint<=12, "general_max_12bit_constraint_flag");
398 WRITE_FLAG(bitDepthConstraint<=10, "general_max_10bit_constraint_flag");
399 WRITE_FLAG(bitDepthConstraint<= 8 && csp != X265_CSP_I422 , "general_max_8bit_constraint_flag");
400 WRITE_FLAG(csp == X265_CSP_I422 || csp == X265_CSP_I420 || csp == X265_CSP_I400, "general_max_422chroma_constraint_flag");
401 WRITE_FLAG(csp == X265_CSP_I420 || csp == X265_CSP_I400, "general_max_420chroma_constraint_flag");
402 WRITE_FLAG(csp == X265_CSP_I400, "general_max_monochrome_constraint_flag");
403 WRITE_FLAG(ptl.intraConstraintFlag, "general_intra_constraint_flag");
404 WRITE_FLAG(ptl.onePictureOnlyConstraintFlag,"general_one_picture_only_constraint_flag");
405 WRITE_FLAG(ptl.lowerBitRateConstraintFlag, "general_lower_bit_rate_constraint_flag");
406 WRITE_CODE(0 , 16, "XXX_reserved_zero_35bits[0..15]");
407 WRITE_CODE(0 , 16, "XXX_reserved_zero_35bits[16..31]");
408 WRITE_CODE(0 , 3, "XXX_reserved_zero_35bits[32..34]");
409 }
410 else
411 {
412 WRITE_CODE(0, 16, "XXX_reserved_zero_44bits[0..15]");
413 WRITE_CODE(0, 16, "XXX_reserved_zero_44bits[16..31]");
414 WRITE_CODE(0, 12, "XXX_reserved_zero_44bits[32..43]");
415 }
416
417 WRITE_CODE(ptl.levelIdc, 8, "general_level_idc");
418
419 if (maxTempSubLayers > 1)
420 {
421 WRITE_FLAG(0, "sub_layer_profile_present_flag[i]");
422 WRITE_FLAG(0, "sub_layer_level_present_flag[i]");
423 for (int i = maxTempSubLayers - 1; i < 8 ; i++)
424 WRITE_CODE(0, 2, "reserved_zero_2bits");
425 }
426 }
427
codeVUI(const VUI & vui,int maxSubTLayers,bool bEmitVUITimingInfo,bool bEmitVUIHRDInfo)428 void Entropy::codeVUI(const VUI& vui, int maxSubTLayers, bool bEmitVUITimingInfo, bool bEmitVUIHRDInfo)
429 {
430 WRITE_FLAG(vui.aspectRatioInfoPresentFlag, "aspect_ratio_info_present_flag");
431 if (vui.aspectRatioInfoPresentFlag)
432 {
433 WRITE_CODE(vui.aspectRatioIdc, 8, "aspect_ratio_idc");
434 if (vui.aspectRatioIdc == 255)
435 {
436 WRITE_CODE(vui.sarWidth, 16, "sar_width");
437 WRITE_CODE(vui.sarHeight, 16, "sar_height");
438 }
439 }
440
441 WRITE_FLAG(vui.overscanInfoPresentFlag, "overscan_info_present_flag");
442 if (vui.overscanInfoPresentFlag)
443 WRITE_FLAG(vui.overscanAppropriateFlag, "overscan_appropriate_flag");
444
445 WRITE_FLAG(vui.videoSignalTypePresentFlag, "video_signal_type_present_flag");
446 if (vui.videoSignalTypePresentFlag)
447 {
448 WRITE_CODE(vui.videoFormat, 3, "video_format");
449 WRITE_FLAG(vui.videoFullRangeFlag, "video_full_range_flag");
450 WRITE_FLAG(vui.colourDescriptionPresentFlag, "colour_description_present_flag");
451 if (vui.colourDescriptionPresentFlag)
452 {
453 WRITE_CODE(vui.colourPrimaries, 8, "colour_primaries");
454 WRITE_CODE(vui.transferCharacteristics, 8, "transfer_characteristics");
455 WRITE_CODE(vui.matrixCoefficients, 8, "matrix_coefficients");
456 }
457 }
458
459 WRITE_FLAG(vui.chromaLocInfoPresentFlag, "chroma_loc_info_present_flag");
460 if (vui.chromaLocInfoPresentFlag)
461 {
462 WRITE_UVLC(vui.chromaSampleLocTypeTopField, "chroma_sample_loc_type_top_field");
463 WRITE_UVLC(vui.chromaSampleLocTypeBottomField, "chroma_sample_loc_type_bottom_field");
464 }
465
466 WRITE_FLAG(0, "neutral_chroma_indication_flag");
467 WRITE_FLAG(vui.fieldSeqFlag, "field_seq_flag");
468 WRITE_FLAG(vui.frameFieldInfoPresentFlag, "frame_field_info_present_flag");
469
470 WRITE_FLAG(vui.defaultDisplayWindow.bEnabled, "default_display_window_flag");
471 if (vui.defaultDisplayWindow.bEnabled)
472 {
473 WRITE_UVLC(vui.defaultDisplayWindow.leftOffset, "def_disp_win_left_offset");
474 WRITE_UVLC(vui.defaultDisplayWindow.rightOffset, "def_disp_win_right_offset");
475 WRITE_UVLC(vui.defaultDisplayWindow.topOffset, "def_disp_win_top_offset");
476 WRITE_UVLC(vui.defaultDisplayWindow.bottomOffset, "def_disp_win_bottom_offset");
477 }
478
479 if (!bEmitVUITimingInfo)
480 WRITE_FLAG(0, "vui_timing_info_present_flag");
481 else
482 {
483 WRITE_FLAG(1, "vui_timing_info_present_flag");
484 WRITE_CODE(vui.timingInfo.numUnitsInTick, 32, "vui_num_units_in_tick");
485 WRITE_CODE(vui.timingInfo.timeScale, 32, "vui_time_scale");
486 WRITE_FLAG(0, "vui_poc_proportional_to_timing_flag");
487 }
488
489 if (!bEmitVUIHRDInfo)
490 WRITE_FLAG(0, "vui_hrd_parameters_present_flag");
491 else
492 {
493 WRITE_FLAG(vui.hrdParametersPresentFlag, "vui_hrd_parameters_present_flag");
494 if (vui.hrdParametersPresentFlag)
495 codeHrdParameters(vui.hrdParameters, maxSubTLayers);
496 }
497
498 WRITE_FLAG(0, "bitstream_restriction_flag");
499 }
500
codeScalingList(const ScalingList & scalingList)501 void Entropy::codeScalingList(const ScalingList& scalingList)
502 {
503 for (int sizeId = 0; sizeId < ScalingList::NUM_SIZES; sizeId++)
504 {
505 for (int listId = 0; listId < ScalingList::NUM_LISTS; listId += (sizeId == 3) ? 3 : 1)
506 {
507 int predList = scalingList.checkPredMode(sizeId, listId);
508 WRITE_FLAG(predList < 0, "scaling_list_pred_mode_flag");
509 if (predList >= 0)
510 WRITE_UVLC(listId - predList, "scaling_list_pred_matrix_id_delta");
511 else // DPCM Mode
512 codeScalingList(scalingList, sizeId, listId);
513 }
514 }
515 }
516
codeScalingList(const ScalingList & scalingList,uint32_t sizeId,uint32_t listId)517 void Entropy::codeScalingList(const ScalingList& scalingList, uint32_t sizeId, uint32_t listId)
518 {
519 int coefNum = X265_MIN(ScalingList::MAX_MATRIX_COEF_NUM, (int)ScalingList::s_numCoefPerSize[sizeId]);
520 const uint16_t* scan = (sizeId == 0 ? g_scan4x4[SCAN_DIAG] : g_scan8x8diag);
521 int nextCoef = START_VALUE;
522 int32_t *src = scalingList.m_scalingListCoef[sizeId][listId];
523 int data;
524
525 if (sizeId > BLOCK_8x8)
526 {
527 WRITE_SVLC(scalingList.m_scalingListDC[sizeId][listId] - 8, "scaling_list_dc_coef_minus8");
528 nextCoef = scalingList.m_scalingListDC[sizeId][listId];
529 }
530 for (int i = 0; i < coefNum; i++)
531 {
532 data = src[scan[i]] - nextCoef;
533 if (data < -128)
534 data += 256;
535 if (data > 127)
536 data -= 256;
537 nextCoef = (nextCoef + data + 256) % 256;
538 WRITE_SVLC(data, "scaling_list_delta_coef");
539 }
540 }
541
codeHrdParameters(const HRDInfo & hrd,int maxSubTLayers)542 void Entropy::codeHrdParameters(const HRDInfo& hrd, int maxSubTLayers)
543 {
544 WRITE_FLAG(1, "nal_hrd_parameters_present_flag");
545 WRITE_FLAG(0, "vcl_hrd_parameters_present_flag");
546 WRITE_FLAG(0, "sub_pic_hrd_params_present_flag");
547
548 WRITE_CODE(hrd.bitRateScale, 4, "bit_rate_scale");
549 WRITE_CODE(hrd.cpbSizeScale, 4, "cpb_size_scale");
550
551 WRITE_CODE(hrd.initialCpbRemovalDelayLength - 1, 5, "initial_cpb_removal_delay_length_minus1");
552 WRITE_CODE(hrd.cpbRemovalDelayLength - 1, 5, "au_cpb_removal_delay_length_minus1");
553 WRITE_CODE(hrd.dpbOutputDelayLength - 1, 5, "dpb_output_delay_length_minus1");
554
555 for (int i = 0; i < maxSubTLayers; i++)
556 {
557 WRITE_FLAG(1, "fixed_pic_rate_general_flag");
558 WRITE_UVLC(0, "elemental_duration_in_tc_minus1");
559 WRITE_UVLC(0, "cpb_cnt_minus1");
560
561 WRITE_UVLC(hrd.bitRateValue - 1, "bit_rate_value_minus1");
562 WRITE_UVLC(hrd.cpbSizeValue - 1, "cpb_size_value_minus1");
563 WRITE_FLAG(hrd.cbrFlag, "cbr_flag");
564 }
565 }
566
codeAUD(const Slice & slice)567 void Entropy::codeAUD(const Slice& slice)
568 {
569 int picType;
570
571 switch (slice.m_sliceType)
572 {
573 case I_SLICE:
574 picType = 0;
575 break;
576 case P_SLICE:
577 picType = 1;
578 break;
579 case B_SLICE:
580 picType = 2;
581 break;
582 default:
583 picType = 7;
584 break;
585 }
586
587 WRITE_CODE(picType, 3, "pic_type");
588 }
589
codeSliceHeader(const Slice & slice,FrameData & encData,uint32_t slice_addr,uint32_t slice_addr_bits,int sliceQp)590 void Entropy::codeSliceHeader(const Slice& slice, FrameData& encData, uint32_t slice_addr, uint32_t slice_addr_bits, int sliceQp)
591 {
592 WRITE_FLAG((slice_addr == 0 ? 1 : 0), "first_slice_segment_in_pic_flag");
593 if (slice.getRapPicFlag())
594 WRITE_FLAG(0, "no_output_of_prior_pics_flag");
595
596 WRITE_UVLC(0, "slice_pic_parameter_set_id");
597
598 /* x265 does not use dependent slices, so always write all this data */
599 if (slice_addr)
600 {
601 // if( dependent_slice_segments_enabled_flag )
602 // dependent_slice_segment_flag u(1)
603 WRITE_CODE(slice_addr, slice_addr_bits, "slice_segment_address");
604 }
605
606 WRITE_UVLC(slice.m_sliceType, "slice_type");
607
608 if (!slice.getIdrPicFlag())
609 {
610 int picOrderCntLSB = (slice.m_poc - slice.m_lastIDR + (1 << slice.m_sps->log2MaxPocLsb)) % (1 << slice.m_sps->log2MaxPocLsb);
611 WRITE_CODE(picOrderCntLSB, slice.m_sps->log2MaxPocLsb, "pic_order_cnt_lsb");
612
613 #if _DEBUG || CHECKED_BUILD
614 // check for bitstream restriction stating that:
615 // If the current picture is a BLA or CRA picture, the value of NumPocTotalCurr shall be equal to 0.
616 // Ideally this process should not be repeated for each slice in a picture
617 if (slice.isIRAP())
618 for (int picIdx = 0; picIdx < slice.m_rps.numberOfPictures; picIdx++)
619 {
620 X265_CHECK(!slice.m_rps.bUsed[picIdx], "pic unused failure\n");
621 }
622 #endif
623
624 if (slice.m_rpsIdx < 0)
625 {
626 WRITE_FLAG(0, "short_term_ref_pic_set_sps_flag");
627 codeShortTermRefPicSet(slice.m_rps, slice.m_sps->spsrpsNum);
628 }
629 else
630 {
631 WRITE_FLAG(1, "short_term_ref_pic_set_sps_flag");
632 int numBits = 0;
633 while ((1 << numBits) < slice.m_iNumRPSInSPS)
634 numBits++;
635
636 if (numBits > 0)
637 WRITE_CODE(slice.m_rpsIdx, numBits, "short_term_ref_pic_set_idx");
638 }
639
640 if (slice.m_sps->bTemporalMVPEnabled)
641 WRITE_FLAG(1, "slice_temporal_mvp_enable_flag");
642 }
643 const SAOParam *saoParam = encData.m_saoParam;
644 if (slice.m_bUseSao)
645 {
646 WRITE_FLAG(saoParam->bSaoFlag[0], "slice_sao_luma_flag");
647 if (encData.m_param->internalCsp != X265_CSP_I400)
648 WRITE_FLAG(saoParam->bSaoFlag[1], "slice_sao_chroma_flag");
649 }
650 else if(encData.m_param->selectiveSAO)
651 {
652 WRITE_FLAG(0, "slice_sao_luma_flag");
653 if (encData.m_param->internalCsp != X265_CSP_I400)
654 WRITE_FLAG(0, "slice_sao_chroma_flag");
655 }
656
657 // check if numRefIdx match the defaults (1, hard-coded in PPS). If not, override
658 // TODO: this might be a place to optimize a few bits per slice, by using param->refs for L0 default
659
660 if (!slice.isIntra())
661 {
662 bool overrideFlag = (slice.m_numRefIdx[0] != slice.numRefIdxDefault[0] || (slice.isInterB() && slice.m_numRefIdx[1] != slice.numRefIdxDefault[1]));
663 WRITE_FLAG(overrideFlag, "num_ref_idx_active_override_flag");
664 if (overrideFlag)
665 {
666 WRITE_UVLC(slice.m_numRefIdx[0] - 1, "num_ref_idx_l0_active_minus1");
667 if (slice.isInterB())
668 WRITE_UVLC(slice.m_numRefIdx[1] - 1, "num_ref_idx_l1_active_minus1");
669 else
670 {
671 X265_CHECK(slice.m_numRefIdx[1] == 0, "expected no L1 references for P slice\n");
672 }
673 }
674 }
675 else
676 {
677 X265_CHECK(!slice.m_numRefIdx[0] && !slice.m_numRefIdx[1], "expected no references for I slice\n");
678 }
679
680 if (slice.isInterB())
681 WRITE_FLAG(0, "mvd_l1_zero_flag");
682
683 if (slice.m_sps->bTemporalMVPEnabled)
684 {
685 if (slice.m_sliceType == B_SLICE)
686 WRITE_FLAG(slice.m_colFromL0Flag, "collocated_from_l0_flag");
687
688 if (slice.m_sliceType != I_SLICE &&
689 ((slice.m_colFromL0Flag && slice.m_numRefIdx[0] > 1) ||
690 (!slice.m_colFromL0Flag && slice.m_numRefIdx[1] > 1)))
691 {
692 WRITE_UVLC(slice.m_colRefIdx, "collocated_ref_idx");
693 }
694 }
695 if ((slice.m_pps->bUseWeightPred && slice.m_sliceType == P_SLICE) || (slice.m_pps->bUseWeightedBiPred && slice.m_sliceType == B_SLICE))
696 codePredWeightTable(slice);
697
698 X265_CHECK(slice.m_maxNumMergeCand <= MRG_MAX_NUM_CANDS, "too many merge candidates\n");
699 if (!slice.isIntra())
700 WRITE_UVLC(MRG_MAX_NUM_CANDS - slice.m_maxNumMergeCand, "five_minus_max_num_merge_cand");
701
702 int code = sliceQp - (slice.m_iPPSQpMinus26 + 26);
703 WRITE_SVLC(code, "slice_qp_delta");
704
705 if (slice.m_pps->pps_slice_chroma_qp_offsets_present_flag)
706 {
707 WRITE_SVLC(slice.m_chromaQpOffset[0], "slice_cb_qp_offset");
708 WRITE_SVLC(slice.m_chromaQpOffset[1], "slice_cr_qp_offset");
709 }
710 // TODO: Enable when pps_loop_filter_across_slices_enabled_flag==1
711 // We didn't support filter across slice board, so disable it now
712
713 if (encData.m_param->maxSlices <= 1)
714 {
715 bool isSAOEnabled = slice.m_sps->bUseSAO && slice.m_bUseSao ? saoParam->bSaoFlag[0] || saoParam->bSaoFlag[1] : false;
716 bool isDBFEnabled = !slice.m_pps->bPicDisableDeblockingFilter;
717
718 if (isSAOEnabled || isDBFEnabled)
719 WRITE_FLAG(slice.m_sLFaseFlag, "slice_loop_filter_across_slices_enabled_flag");
720 }
721 }
722
723 /** write wavefront substreams sizes for the slice header */
codeSliceHeaderWPPEntryPoints(const uint32_t * substreamSizes,uint32_t numSubStreams,uint32_t maxOffset)724 void Entropy::codeSliceHeaderWPPEntryPoints(const uint32_t *substreamSizes, uint32_t numSubStreams, uint32_t maxOffset)
725 {
726 uint32_t offsetLen = 1;
727 while (maxOffset >= (1U << offsetLen))
728 {
729 offsetLen++;
730 X265_CHECK(offsetLen < 32, "offsetLen is too large\n");
731 }
732
733 WRITE_UVLC(numSubStreams, "num_entry_point_offsets");
734 if (numSubStreams > 0)
735 WRITE_UVLC(offsetLen - 1, "offset_len_minus1");
736
737 for (uint32_t i = 0; i < numSubStreams; i++)
738 WRITE_CODE(substreamSizes[i] - 1, offsetLen, "entry_point_offset_minus1");
739 }
740
codeShortTermRefPicSet(const RPS & rps,int idx)741 void Entropy::codeShortTermRefPicSet(const RPS& rps, int idx)
742 {
743 if (idx > 0)
744 WRITE_FLAG(0, "inter_ref_pic_set_prediction_flag");
745
746 WRITE_UVLC(rps.numberOfNegativePictures, "num_negative_pics");
747 WRITE_UVLC(rps.numberOfPositivePictures, "num_positive_pics");
748 int prev = 0;
749 for (int j = 0; j < rps.numberOfNegativePictures; j++)
750 {
751 WRITE_UVLC(prev - rps.deltaPOC[j] - 1, "delta_poc_s0_minus1");
752 prev = rps.deltaPOC[j];
753 WRITE_FLAG(rps.bUsed[j], "used_by_curr_pic_s0_flag");
754 }
755
756 prev = 0;
757 for (int j = rps.numberOfNegativePictures; j < rps.numberOfNegativePictures + rps.numberOfPositivePictures; j++)
758 {
759 WRITE_UVLC(rps.deltaPOC[j] - prev - 1, "delta_poc_s1_minus1");
760 prev = rps.deltaPOC[j];
761 WRITE_FLAG(rps.bUsed[j], "used_by_curr_pic_s1_flag");
762 }
763 }
764
encodeCTU(const CUData & ctu,const CUGeom & cuGeom)765 void Entropy::encodeCTU(const CUData& ctu, const CUGeom& cuGeom)
766 {
767 bool bEncodeDQP = ctu.m_slice->m_pps->bUseDQP;
768 encodeCU(ctu, cuGeom, 0, 0, bEncodeDQP);
769 }
770
771 /* encode a CU block recursively */
encodeCU(const CUData & ctu,const CUGeom & cuGeom,uint32_t absPartIdx,uint32_t depth,bool & bEncodeDQP)772 void Entropy::encodeCU(const CUData& ctu, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t depth, bool& bEncodeDQP)
773 {
774 const Slice* slice = ctu.m_slice;
775
776 int cuSplitFlag = !(cuGeom.flags & CUGeom::LEAF);
777 int cuUnsplitFlag = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
778
779 if (!cuUnsplitFlag)
780 {
781 uint32_t qNumParts = cuGeom.numPartitions >> 2;
782 if (depth == slice->m_pps->maxCuDQPDepth && slice->m_pps->bUseDQP)
783 bEncodeDQP = true;
784 for (uint32_t qIdx = 0; qIdx < 4; ++qIdx, absPartIdx += qNumParts)
785 {
786 const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + qIdx);
787 if (childGeom.flags & CUGeom::PRESENT)
788 encodeCU(ctu, childGeom, absPartIdx, depth + 1, bEncodeDQP);
789 }
790 return;
791 }
792
793 if (cuSplitFlag)
794 codeSplitFlag(ctu, absPartIdx, depth);
795
796 if (depth < ctu.m_cuDepth[absPartIdx] && depth < ctu.m_encData->m_param->maxCUDepth)
797 {
798 uint32_t qNumParts = cuGeom.numPartitions >> 2;
799 if (depth == slice->m_pps->maxCuDQPDepth && slice->m_pps->bUseDQP)
800 bEncodeDQP = true;
801 for (uint32_t qIdx = 0; qIdx < 4; ++qIdx, absPartIdx += qNumParts)
802 {
803 const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + qIdx);
804 encodeCU(ctu, childGeom, absPartIdx, depth + 1, bEncodeDQP);
805 }
806 return;
807 }
808
809 if (depth <= slice->m_pps->maxCuDQPDepth && slice->m_pps->bUseDQP)
810 bEncodeDQP = true;
811
812 if (slice->m_pps->bTransquantBypassEnabled)
813 codeCUTransquantBypassFlag(ctu.m_tqBypass[absPartIdx]);
814
815 if (!slice->isIntra())
816 {
817 codeSkipFlag(ctu, absPartIdx);
818 if (ctu.isSkipped(absPartIdx))
819 {
820 codeMergeIndex(ctu, absPartIdx);
821 finishCU(ctu, absPartIdx, depth, bEncodeDQP);
822 return;
823 }
824 codePredMode(ctu.m_predMode[absPartIdx]);
825 }
826
827 codePartSize(ctu, absPartIdx, depth);
828
829 // prediction Info ( Intra : direction mode, Inter : Mv, reference idx )
830 codePredInfo(ctu, absPartIdx);
831
832 uint32_t tuDepthRange[2];
833 if (ctu.isIntra(absPartIdx))
834 ctu.getIntraTUQtDepthRange(tuDepthRange, absPartIdx);
835 else
836 ctu.getInterTUQtDepthRange(tuDepthRange, absPartIdx);
837
838 // Encode Coefficients, allow codeCoeff() to modify bEncodeDQP
839 codeCoeff(ctu, absPartIdx, bEncodeDQP, tuDepthRange);
840
841 // --- write terminating bit ---
842 finishCU(ctu, absPartIdx, depth, bEncodeDQP);
843 }
844
845 /* Return bit count of signaling inter mode */
bitsInterMode(const CUData & cu,uint32_t absPartIdx,uint32_t depth) const846 uint32_t Entropy::bitsInterMode(const CUData& cu, uint32_t absPartIdx, uint32_t depth) const
847 {
848 uint32_t bits;
849 bits = bitsCodeBin(0, m_contextState[OFF_SKIP_FLAG_CTX + cu.getCtxSkipFlag(absPartIdx)]); /* not skip */
850 bits += bitsCodeBin(0, m_contextState[OFF_PRED_MODE_CTX]); /* inter */
851 PartSize partSize = (PartSize)cu.m_partSize[absPartIdx];
852 switch (partSize)
853 {
854 case SIZE_2Nx2N:
855 bits += bitsCodeBin(1, m_contextState[OFF_PART_SIZE_CTX]);
856 break;
857
858 case SIZE_2NxN:
859 case SIZE_2NxnU:
860 case SIZE_2NxnD:
861 bits += bitsCodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
862 bits += bitsCodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 1]);
863 if (cu.m_slice->m_sps->maxAMPDepth > depth)
864 {
865 bits += bitsCodeBin((partSize == SIZE_2NxN) ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX + 3]);
866 if (partSize != SIZE_2NxN)
867 bits++; // encodeBinEP((partSize == SIZE_2NxnU ? 0 : 1));
868 }
869 break;
870
871 case SIZE_Nx2N:
872 case SIZE_nLx2N:
873 case SIZE_nRx2N:
874 bits += bitsCodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
875 bits += bitsCodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 1]);
876 if (depth == cu.m_encData->m_param->maxCUDepth && !(cu.m_log2CUSize[absPartIdx] == 3))
877 bits += bitsCodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 2]);
878 if (cu.m_slice->m_sps->maxAMPDepth > depth)
879 {
880 bits += bitsCodeBin((partSize == SIZE_Nx2N) ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX + 3]);
881 if (partSize != SIZE_Nx2N)
882 bits++; // encodeBinEP((partSize == SIZE_nLx2N ? 0 : 1));
883 }
884 break;
885 default:
886 X265_CHECK(0, "invalid CU partition\n");
887 break;
888 }
889
890 return bits;
891 }
892
893 /* finish encoding a cu and handle end-of-slice conditions */
finishCU(const CUData & ctu,uint32_t absPartIdx,uint32_t depth,bool bCodeDQP)894 void Entropy::finishCU(const CUData& ctu, uint32_t absPartIdx, uint32_t depth, bool bCodeDQP)
895 {
896 const Slice* slice = ctu.m_slice;
897 uint32_t realEndAddress = slice->m_endCUAddr;
898 uint32_t cuAddr = ctu.getSCUAddr() + absPartIdx;
899 X265_CHECK(realEndAddress == slice->realEndAddress(slice->m_endCUAddr), "real end address expected\n");
900
901 uint32_t granularityMask = ctu.m_encData->m_param->maxCUSize - 1;
902 uint32_t cuSize = 1 << ctu.m_log2CUSize[absPartIdx];
903 uint32_t rpelx = ctu.m_cuPelX + g_zscanToPelX[absPartIdx] + cuSize;
904 uint32_t bpely = ctu.m_cuPelY + g_zscanToPelY[absPartIdx] + cuSize;
905 bool granularityBoundary = (((rpelx & granularityMask) == 0 || (rpelx == slice->m_sps->picWidthInLumaSamples )) &&
906 ((bpely & granularityMask) == 0 || (bpely == slice->m_sps->picHeightInLumaSamples)));
907
908 if (slice->m_pps->bUseDQP)
909 const_cast<CUData&>(ctu).setQPSubParts(bCodeDQP ? ctu.getRefQP(absPartIdx) : ctu.m_qp[absPartIdx], absPartIdx, depth);
910
911 if (granularityBoundary)
912 {
913 // Encode slice finish
914 uint32_t bTerminateSlice = ctu.m_bLastCuInSlice;
915 if (cuAddr + (slice->m_param->num4x4Partitions >> (depth << 1)) == realEndAddress)
916 bTerminateSlice = 1;
917
918 // The 1-terminating bit is added to all streams, so don't add it here when it's 1.
919 if (!bTerminateSlice)
920 encodeBinTrm(0); // end_of_slice_segment_flag
921
922 if (!m_bitIf)
923 resetBits(); // TODO: most likely unnecessary
924 }
925 }
926
encodeTransform(const CUData & cu,uint32_t absPartIdx,uint32_t curDepth,uint32_t log2CurSize,bool & bCodeDQP,const uint32_t depthRange[2])927 void Entropy::encodeTransform(const CUData& cu, uint32_t absPartIdx, uint32_t curDepth, uint32_t log2CurSize,
928 bool& bCodeDQP, const uint32_t depthRange[2])
929 {
930 const bool subdiv = cu.m_tuDepth[absPartIdx] > curDepth;
931
932 /* in each of these conditions, the subdiv flag is implied and not signaled,
933 * so we have checks to make sure the implied value matches our intentions */
934 if (cu.isIntra(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N && log2CurSize == MIN_LOG2_CU_SIZE)
935 {
936 X265_CHECK(subdiv, "intra NxN requires TU depth below CU depth\n");
937 }
938 else if (cu.isInter(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N &&
939 !curDepth && cu.m_slice->m_sps->quadtreeTUMaxDepthInter == 1)
940 {
941 X265_CHECK(subdiv, "inter TU must be smaller than CU when not 2Nx2N part size: log2CurSize %d, depthRange[0] %d\n", log2CurSize, depthRange[0]);
942 }
943 else if (log2CurSize > depthRange[1])
944 {
945 X265_CHECK(subdiv, "TU is larger than the max allowed, it should have been split\n");
946 }
947 else if (log2CurSize == cu.m_slice->m_sps->quadtreeTULog2MinSize || log2CurSize == depthRange[0])
948 {
949 X265_CHECK(!subdiv, "min sized TU cannot be subdivided\n");
950 }
951 else
952 {
953 X265_CHECK(log2CurSize > depthRange[0], "transform size failure\n");
954 codeTransformSubdivFlag(subdiv, 5 - log2CurSize);
955 }
956
957 uint32_t hChromaShift = cu.m_hChromaShift;
958 uint32_t vChromaShift = cu.m_vChromaShift;
959 bool bSmallChroma = (log2CurSize - hChromaShift) < 2;
960 if (!curDepth || !bSmallChroma)
961 {
962 uint32_t parentIdx = absPartIdx & (0xFF << (log2CurSize + 1 - LOG2_UNIT_SIZE) * 2);
963 if (!curDepth || cu.getCbf(parentIdx, TEXT_CHROMA_U, curDepth - 1))
964 codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_U, curDepth, !subdiv);
965 if (!curDepth || cu.getCbf(parentIdx, TEXT_CHROMA_V, curDepth - 1))
966 codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_V, curDepth, !subdiv);
967 }
968
969 if (subdiv)
970 {
971 --log2CurSize;
972 ++curDepth;
973
974 uint32_t qNumParts = 1 << (log2CurSize - LOG2_UNIT_SIZE) * 2;
975
976 encodeTransform(cu, absPartIdx + 0 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
977 encodeTransform(cu, absPartIdx + 1 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
978 encodeTransform(cu, absPartIdx + 2 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
979 encodeTransform(cu, absPartIdx + 3 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
980 return;
981 }
982
983 uint32_t absPartIdxC = bSmallChroma ? absPartIdx & 0xFC : absPartIdx;
984
985 if (cu.isInter(absPartIdxC) && !curDepth && !cu.getCbf(absPartIdxC, TEXT_CHROMA_U, 0) && !cu.getCbf(absPartIdxC, TEXT_CHROMA_V, 0))
986 {
987 X265_CHECK(cu.getCbf(absPartIdxC, TEXT_LUMA, 0), "CBF should have been set\n");
988 }
989 else
990 codeQtCbfLuma(cu.getCbf(absPartIdx, TEXT_LUMA, curDepth), curDepth);
991
992 uint32_t cbfY = cu.getCbf(absPartIdx, TEXT_LUMA, curDepth);
993 uint32_t cbfU = cu.getCbf(absPartIdxC, TEXT_CHROMA_U, curDepth);
994 uint32_t cbfV = cu.getCbf(absPartIdxC, TEXT_CHROMA_V, curDepth);
995 if (!(cbfY || cbfU || cbfV))
996 return;
997
998 // dQP: only for CTU once
999 if (cu.m_slice->m_pps->bUseDQP && bCodeDQP)
1000 {
1001 uint32_t log2CUSize = cu.m_log2CUSize[absPartIdx];
1002 uint32_t absPartIdxLT = absPartIdx & (0xFF << (log2CUSize - LOG2_UNIT_SIZE) * 2);
1003 codeDeltaQP(cu, absPartIdxLT);
1004 bCodeDQP = false;
1005 }
1006
1007 if (cbfY)
1008 {
1009 uint32_t coeffOffset = absPartIdx << (LOG2_UNIT_SIZE * 2);
1010 codeCoeffNxN(cu, cu.m_trCoeff[0] + coeffOffset, absPartIdx, log2CurSize, TEXT_LUMA);
1011 if (!(cbfU || cbfV))
1012 return;
1013 }
1014
1015 if (bSmallChroma)
1016 {
1017 if ((absPartIdx & 3) != 3)
1018 return;
1019
1020 const uint32_t log2CurSizeC = 2;
1021 const bool splitIntoSubTUs = (cu.m_chromaFormat == X265_CSP_I422);
1022 const uint32_t curPartNum = 4;
1023 uint32_t coeffOffsetC = absPartIdxC << (LOG2_UNIT_SIZE * 2 - (hChromaShift + vChromaShift));
1024 for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
1025 {
1026 TURecurse tuIterator(splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, curPartNum, absPartIdxC);
1027 const coeff_t* coeffChroma = cu.m_trCoeff[chromaId];
1028 do
1029 {
1030 if (cu.getCbf(tuIterator.absPartIdxTURelCU, (TextType)chromaId, curDepth + splitIntoSubTUs))
1031 {
1032 uint32_t subTUOffset = tuIterator.section << (log2CurSizeC * 2);
1033 codeCoeffNxN(cu, coeffChroma + coeffOffsetC + subTUOffset, tuIterator.absPartIdxTURelCU, log2CurSizeC, (TextType)chromaId);
1034 }
1035 }
1036 while (tuIterator.isNextSection());
1037 }
1038 }
1039 else
1040 {
1041 uint32_t log2CurSizeC = log2CurSize - hChromaShift;
1042 const bool splitIntoSubTUs = (cu.m_chromaFormat == X265_CSP_I422);
1043 uint32_t curPartNum = 1 << (log2CurSize - LOG2_UNIT_SIZE) * 2;
1044 uint32_t coeffOffsetC = absPartIdxC << (LOG2_UNIT_SIZE * 2 - (hChromaShift + vChromaShift));
1045 for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
1046 {
1047 TURecurse tuIterator(splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, curPartNum, absPartIdxC);
1048 const coeff_t* coeffChroma = cu.m_trCoeff[chromaId];
1049 do
1050 {
1051 if (cu.getCbf(tuIterator.absPartIdxTURelCU, (TextType)chromaId, curDepth + splitIntoSubTUs))
1052 {
1053 uint32_t subTUOffset = tuIterator.section << (log2CurSizeC * 2);
1054 codeCoeffNxN(cu, coeffChroma + coeffOffsetC + subTUOffset, tuIterator.absPartIdxTURelCU, log2CurSizeC, (TextType)chromaId);
1055 }
1056 }
1057 while (tuIterator.isNextSection());
1058 }
1059 }
1060 }
1061
encodeTransformLuma(const CUData & cu,uint32_t absPartIdx,uint32_t curDepth,uint32_t log2CurSize,bool & bCodeDQP,const uint32_t depthRange[2])1062 void Entropy::encodeTransformLuma(const CUData& cu, uint32_t absPartIdx, uint32_t curDepth, uint32_t log2CurSize,
1063 bool& bCodeDQP, const uint32_t depthRange[2])
1064 {
1065 const bool subdiv = cu.m_tuDepth[absPartIdx] > curDepth;
1066
1067 /* in each of these conditions, the subdiv flag is implied and not signaled,
1068 * so we have checks to make sure the implied value matches our intentions */
1069 if (cu.isIntra(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N && log2CurSize == MIN_LOG2_CU_SIZE)
1070 {
1071 X265_CHECK(subdiv, "intra NxN requires TU depth below CU depth\n");
1072 }
1073 else if (cu.isInter(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N &&
1074 !curDepth && cu.m_slice->m_sps->quadtreeTUMaxDepthInter == 1)
1075 {
1076 X265_CHECK(subdiv, "inter TU must be smaller than CU when not 2Nx2N part size: log2CurSize %d, depthRange[0] %d\n", log2CurSize, depthRange[0]);
1077 }
1078 else if (log2CurSize > depthRange[1])
1079 {
1080 X265_CHECK(subdiv, "TU is larger than the max allowed, it should have been split\n");
1081 }
1082 else if (log2CurSize == cu.m_slice->m_sps->quadtreeTULog2MinSize || log2CurSize == depthRange[0])
1083 {
1084 X265_CHECK(!subdiv, "min sized TU cannot be subdivided\n");
1085 }
1086 else
1087 {
1088 X265_CHECK(log2CurSize > depthRange[0], "transform size failure\n");
1089 codeTransformSubdivFlag(subdiv, 5 - log2CurSize);
1090 }
1091
1092 if (subdiv)
1093 {
1094 --log2CurSize;
1095 ++curDepth;
1096
1097 uint32_t qNumParts = 1 << (log2CurSize - LOG2_UNIT_SIZE) * 2;
1098
1099 encodeTransformLuma(cu, absPartIdx + 0 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1100 encodeTransformLuma(cu, absPartIdx + 1 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1101 encodeTransformLuma(cu, absPartIdx + 2 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1102 encodeTransformLuma(cu, absPartIdx + 3 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1103 return;
1104 }
1105
1106 if (!cu.isIntra(absPartIdx) && !curDepth)
1107 {
1108 X265_CHECK(cu.getCbf(absPartIdx, TEXT_LUMA, 0), "CBF should have been set\n");
1109 }
1110 else
1111 codeQtCbfLuma(cu.getCbf(absPartIdx, TEXT_LUMA, curDepth), curDepth);
1112
1113 uint32_t cbfY = cu.getCbf(absPartIdx, TEXT_LUMA, curDepth);
1114
1115 if (!cbfY)
1116 return;
1117
1118 // dQP: only for CTU once
1119 if (cu.m_slice->m_pps->bUseDQP && bCodeDQP)
1120 {
1121 uint32_t log2CUSize = cu.m_log2CUSize[absPartIdx];
1122 uint32_t absPartIdxLT = absPartIdx & (0xFF << (log2CUSize - LOG2_UNIT_SIZE) * 2);
1123 codeDeltaQP(cu, absPartIdxLT);
1124 bCodeDQP = false;
1125 }
1126
1127 if (cbfY)
1128 {
1129 uint32_t coeffOffset = absPartIdx << (LOG2_UNIT_SIZE * 2);
1130 codeCoeffNxN(cu, cu.m_trCoeff[0] + coeffOffset, absPartIdx, log2CurSize, TEXT_LUMA);
1131 }
1132 }
1133
1134
codePredInfo(const CUData & cu,uint32_t absPartIdx)1135 void Entropy::codePredInfo(const CUData& cu, uint32_t absPartIdx)
1136 {
1137 if (cu.isIntra(absPartIdx)) // If it is intra mode, encode intra prediction mode.
1138 {
1139 codeIntraDirLumaAng(cu, absPartIdx, true);
1140 if (cu.m_chromaFormat != X265_CSP_I400)
1141 {
1142 uint32_t chromaDirMode[NUM_CHROMA_MODE];
1143 cu.getAllowedChromaDir(absPartIdx, chromaDirMode);
1144
1145 codeIntraDirChroma(cu, absPartIdx, chromaDirMode);
1146
1147 if (cu.m_chromaFormat == X265_CSP_I444 && cu.m_partSize[absPartIdx] != SIZE_2Nx2N)
1148 {
1149 uint32_t qNumParts = 1 << (cu.m_log2CUSize[absPartIdx] - 1 - LOG2_UNIT_SIZE) * 2;
1150 for (uint32_t qIdx = 1; qIdx < 4; ++qIdx)
1151 {
1152 absPartIdx += qNumParts;
1153 cu.getAllowedChromaDir(absPartIdx, chromaDirMode);
1154 codeIntraDirChroma(cu, absPartIdx, chromaDirMode);
1155 }
1156 }
1157 }
1158 }
1159 else // if it is inter mode, encode motion vector and reference index
1160 codePUWise(cu, absPartIdx);
1161 }
1162
1163 /** encode motion information for every PU block */
codePUWise(const CUData & cu,uint32_t absPartIdx)1164 void Entropy::codePUWise(const CUData& cu, uint32_t absPartIdx)
1165 {
1166 X265_CHECK(!cu.isIntra(absPartIdx), "intra block not expected\n");
1167 uint32_t numPU = cu.getNumPartInter(absPartIdx);
1168
1169 for (uint32_t puIdx = 0, subPartIdx = absPartIdx; puIdx < numPU; puIdx++, subPartIdx += cu.getPUOffset(puIdx, absPartIdx))
1170 {
1171 codeMergeFlag(cu, subPartIdx);
1172 if (cu.m_mergeFlag[subPartIdx])
1173 codeMergeIndex(cu, subPartIdx);
1174 else
1175 {
1176 if (cu.m_slice->isInterB())
1177 codeInterDir(cu, subPartIdx);
1178
1179 uint32_t interDir = cu.m_interDir[subPartIdx];
1180 for (uint32_t list = 0; list < 2; list++)
1181 {
1182 if (interDir & (1 << list))
1183 {
1184 X265_CHECK(cu.m_slice->m_numRefIdx[list] > 0, "numRefs should have been > 0\n");
1185
1186 codeRefFrmIdxPU(cu, subPartIdx, list);
1187 codeMvd(cu, subPartIdx, list);
1188 codeMVPIdx(cu.m_mvpIdx[list][subPartIdx]);
1189 }
1190 }
1191 }
1192 }
1193 }
1194
1195 /** encode reference frame index for a PU block */
codeRefFrmIdxPU(const CUData & cu,uint32_t absPartIdx,int list)1196 void Entropy::codeRefFrmIdxPU(const CUData& cu, uint32_t absPartIdx, int list)
1197 {
1198 X265_CHECK(!cu.isIntra(absPartIdx), "intra block not expected\n");
1199
1200 if (cu.m_slice->m_numRefIdx[list] > 1)
1201 codeRefFrmIdx(cu, absPartIdx, list);
1202 }
1203
codeCoeff(const CUData & cu,uint32_t absPartIdx,bool & bCodeDQP,const uint32_t depthRange[2])1204 void Entropy::codeCoeff(const CUData& cu, uint32_t absPartIdx, bool& bCodeDQP, const uint32_t depthRange[2])
1205 {
1206 if (!cu.isIntra(absPartIdx))
1207 {
1208 if (!(cu.m_mergeFlag[absPartIdx] && cu.m_partSize[absPartIdx] == SIZE_2Nx2N))
1209 codeQtRootCbf(cu.getQtRootCbf(absPartIdx));
1210 if (!cu.getQtRootCbf(absPartIdx))
1211 return;
1212 }
1213
1214 uint32_t log2CUSize = cu.m_log2CUSize[absPartIdx];
1215 if (cu.m_chromaFormat == X265_CSP_I400)
1216 encodeTransformLuma(cu, absPartIdx, 0, log2CUSize, bCodeDQP, depthRange);
1217 else
1218 encodeTransform(cu, absPartIdx, 0, log2CUSize, bCodeDQP, depthRange);
1219 }
1220
codeSaoOffset(const SaoCtuParam & ctuParam,int plane)1221 void Entropy::codeSaoOffset(const SaoCtuParam& ctuParam, int plane)
1222 {
1223 int typeIdx = ctuParam.typeIdx;
1224
1225 if (plane != 2)
1226 {
1227 encodeBin(typeIdx >= 0, m_contextState[OFF_SAO_TYPE_IDX_CTX]);
1228 if (typeIdx >= 0)
1229 encodeBinEP(typeIdx < SAO_BO ? 1 : 0);
1230 }
1231
1232 if (typeIdx >= 0)
1233 {
1234 enum { OFFSET_THRESH = 1 << X265_MIN(X265_DEPTH - 5, 5) };
1235 if (typeIdx == SAO_BO)
1236 {
1237 for (int i = 0; i < SAO_NUM_OFFSET; i++)
1238 codeSaoMaxUvlc(abs(ctuParam.offset[i]), OFFSET_THRESH - 1);
1239
1240 for (int i = 0; i < SAO_NUM_OFFSET; i++)
1241 if (ctuParam.offset[i] != 0)
1242 encodeBinEP(ctuParam.offset[i] < 0);
1243
1244 encodeBinsEP(ctuParam.bandPos, 5);
1245 }
1246 else // if (typeIdx < SAO_BO)
1247 {
1248 codeSaoMaxUvlc(ctuParam.offset[0], OFFSET_THRESH - 1);
1249 codeSaoMaxUvlc(ctuParam.offset[1], OFFSET_THRESH - 1);
1250 codeSaoMaxUvlc(-ctuParam.offset[2], OFFSET_THRESH - 1);
1251 codeSaoMaxUvlc(-ctuParam.offset[3], OFFSET_THRESH - 1);
1252 if (plane != 2)
1253 encodeBinsEP((uint32_t)(typeIdx), 2);
1254 }
1255 }
1256 }
1257
codeSaoOffsetEO(int * offset,int typeIdx,int plane)1258 void Entropy::codeSaoOffsetEO(int *offset, int typeIdx, int plane)
1259 {
1260 if (plane != 2)
1261 {
1262 encodeBin(1, m_contextState[OFF_SAO_TYPE_IDX_CTX]);
1263 encodeBinEP(1);
1264 }
1265
1266 enum { OFFSET_THRESH = 1 << X265_MIN(X265_DEPTH - 5, 5) };
1267
1268 codeSaoMaxUvlc(offset[0], OFFSET_THRESH - 1);
1269 codeSaoMaxUvlc(offset[1], OFFSET_THRESH - 1);
1270 codeSaoMaxUvlc(-offset[2], OFFSET_THRESH - 1);
1271 codeSaoMaxUvlc(-offset[3], OFFSET_THRESH - 1);
1272 if (plane != 2)
1273 encodeBinsEP((uint32_t)(typeIdx), 2);
1274 }
1275
codeSaoOffsetBO(int * offset,int bandPos,int plane)1276 void Entropy::codeSaoOffsetBO(int *offset, int bandPos, int plane)
1277 {
1278 if (plane != 2)
1279 {
1280 encodeBin(1, m_contextState[OFF_SAO_TYPE_IDX_CTX]);
1281 encodeBinEP(0);
1282 }
1283
1284 enum { OFFSET_THRESH = 1 << X265_MIN(X265_DEPTH - 5, 5) };
1285
1286 for (int i = 0; i < SAO_NUM_OFFSET; i++)
1287 codeSaoMaxUvlc(abs(offset[i]), OFFSET_THRESH - 1);
1288
1289 for (int i = 0; i < SAO_NUM_OFFSET; i++)
1290 if (offset[i] != 0)
1291 encodeBinEP(offset[i] < 0);
1292
1293 encodeBinsEP(bandPos, 5);
1294 }
1295
1296 /** initialize context model with respect to QP and initialization value */
sbacInit(int qp,int initValue)1297 uint8_t sbacInit(int qp, int initValue)
1298 {
1299 qp = x265_clip3(QP_MIN, QP_MAX_SPEC, qp);
1300
1301 int slope = (initValue >> 4) * 5 - 45;
1302 int offset = ((initValue & 15) << 3) - 16;
1303 int initState = X265_MIN(X265_MAX(1, (((slope * qp) >> 4) + offset)), 126);
1304 uint32_t mpState = (initState >= 64);
1305 uint32_t state = ((mpState ? (initState - 64) : (63 - initState)) << 1) + mpState;
1306
1307 return (uint8_t)state;
1308 }
1309
initBuffer(uint8_t * contextModel,SliceType sliceType,int qp,uint8_t * ctxModel,int size)1310 static void initBuffer(uint8_t* contextModel, SliceType sliceType, int qp, uint8_t* ctxModel, int size)
1311 {
1312 ctxModel += sliceType * size;
1313
1314 for (int n = 0; n < size; n++)
1315 contextModel[n] = sbacInit(qp, ctxModel[n]);
1316 }
1317
resetEntropy(const Slice & slice)1318 void Entropy::resetEntropy(const Slice& slice)
1319 {
1320 int qp = slice.m_sliceQp;
1321 SliceType sliceType = slice.m_sliceType;
1322
1323 initBuffer(&m_contextState[OFF_SPLIT_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SPLIT_FLAG, NUM_SPLIT_FLAG_CTX);
1324 initBuffer(&m_contextState[OFF_SKIP_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SKIP_FLAG, NUM_SKIP_FLAG_CTX);
1325 initBuffer(&m_contextState[OFF_MERGE_FLAG_EXT_CTX], sliceType, qp, (uint8_t*)INIT_MERGE_FLAG_EXT, NUM_MERGE_FLAG_EXT_CTX);
1326 initBuffer(&m_contextState[OFF_MERGE_IDX_EXT_CTX], sliceType, qp, (uint8_t*)INIT_MERGE_IDX_EXT, NUM_MERGE_IDX_EXT_CTX);
1327 initBuffer(&m_contextState[OFF_PART_SIZE_CTX], sliceType, qp, (uint8_t*)INIT_PART_SIZE, NUM_PART_SIZE_CTX);
1328 initBuffer(&m_contextState[OFF_PRED_MODE_CTX], sliceType, qp, (uint8_t*)INIT_PRED_MODE, NUM_PRED_MODE_CTX);
1329 initBuffer(&m_contextState[OFF_ADI_CTX], sliceType, qp, (uint8_t*)INIT_INTRA_PRED_MODE, NUM_ADI_CTX);
1330 initBuffer(&m_contextState[OFF_CHROMA_PRED_CTX], sliceType, qp, (uint8_t*)INIT_CHROMA_PRED_MODE, NUM_CHROMA_PRED_CTX);
1331 initBuffer(&m_contextState[OFF_DELTA_QP_CTX], sliceType, qp, (uint8_t*)INIT_DQP, NUM_DELTA_QP_CTX);
1332 initBuffer(&m_contextState[OFF_INTER_DIR_CTX], sliceType, qp, (uint8_t*)INIT_INTER_DIR, NUM_INTER_DIR_CTX);
1333 initBuffer(&m_contextState[OFF_REF_NO_CTX], sliceType, qp, (uint8_t*)INIT_REF_PIC, NUM_REF_NO_CTX);
1334 initBuffer(&m_contextState[OFF_MV_RES_CTX], sliceType, qp, (uint8_t*)INIT_MVD, NUM_MV_RES_CTX);
1335 initBuffer(&m_contextState[OFF_QT_CBF_CTX], sliceType, qp, (uint8_t*)INIT_QT_CBF, NUM_QT_CBF_CTX);
1336 initBuffer(&m_contextState[OFF_TRANS_SUBDIV_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_TRANS_SUBDIV_FLAG, NUM_TRANS_SUBDIV_FLAG_CTX);
1337 initBuffer(&m_contextState[OFF_QT_ROOT_CBF_CTX], sliceType, qp, (uint8_t*)INIT_QT_ROOT_CBF, NUM_QT_ROOT_CBF_CTX);
1338 initBuffer(&m_contextState[OFF_SIG_CG_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SIG_CG_FLAG, 2 * NUM_SIG_CG_FLAG_CTX);
1339 initBuffer(&m_contextState[OFF_SIG_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SIG_FLAG, NUM_SIG_FLAG_CTX);
1340 initBuffer(&m_contextState[OFF_CTX_LAST_FLAG_X], sliceType, qp, (uint8_t*)INIT_LAST, NUM_CTX_LAST_FLAG_XY);
1341 initBuffer(&m_contextState[OFF_CTX_LAST_FLAG_Y], sliceType, qp, (uint8_t*)INIT_LAST, NUM_CTX_LAST_FLAG_XY);
1342 initBuffer(&m_contextState[OFF_ONE_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_ONE_FLAG, NUM_ONE_FLAG_CTX);
1343 initBuffer(&m_contextState[OFF_ABS_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_ABS_FLAG, NUM_ABS_FLAG_CTX);
1344 initBuffer(&m_contextState[OFF_MVP_IDX_CTX], sliceType, qp, (uint8_t*)INIT_MVP_IDX, NUM_MVP_IDX_CTX);
1345 initBuffer(&m_contextState[OFF_SAO_MERGE_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SAO_MERGE_FLAG, NUM_SAO_MERGE_FLAG_CTX);
1346 initBuffer(&m_contextState[OFF_SAO_TYPE_IDX_CTX], sliceType, qp, (uint8_t*)INIT_SAO_TYPE_IDX, NUM_SAO_TYPE_IDX_CTX);
1347 initBuffer(&m_contextState[OFF_TRANSFORMSKIP_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_TRANSFORMSKIP_FLAG, 2 * NUM_TRANSFORMSKIP_FLAG_CTX);
1348 initBuffer(&m_contextState[OFF_TQUANT_BYPASS_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_CU_TRANSQUANT_BYPASS_FLAG, NUM_TQUANT_BYPASS_FLAG_CTX);
1349 // new structure
1350
1351 start();
1352 }
1353
1354 /* code explicit wp tables */
codePredWeightTable(const Slice & slice)1355 void Entropy::codePredWeightTable(const Slice& slice)
1356 {
1357 const WeightParam *wp;
1358 bool bChroma = slice.m_sps->chromaFormatIdc != X265_CSP_I400;
1359 bool bDenomCoded = false;
1360 int numRefDirs = slice.m_sliceType == B_SLICE ? 2 : 1;
1361 uint32_t totalSignalledWeightFlags = 0;
1362
1363 if ((slice.m_sliceType == P_SLICE && slice.m_pps->bUseWeightPred) ||
1364 (slice.m_sliceType == B_SLICE && slice.m_pps->bUseWeightedBiPred))
1365 {
1366 for (int list = 0; list < numRefDirs; list++)
1367 {
1368 for (int ref = 0; ref < slice.m_numRefIdx[list]; ref++)
1369 {
1370 wp = slice.m_weightPredTable[list][ref];
1371 if (!bDenomCoded)
1372 {
1373 WRITE_UVLC(wp[0].log2WeightDenom, "luma_log2_weight_denom");
1374
1375 if (bChroma)
1376 {
1377 int deltaDenom = wp[1].log2WeightDenom - wp[0].log2WeightDenom;
1378 WRITE_SVLC(deltaDenom, "delta_chroma_log2_weight_denom");
1379 }
1380 bDenomCoded = true;
1381 }
1382 WRITE_FLAG(!!wp[0].wtPresent, "luma_weight_lX_flag");
1383 totalSignalledWeightFlags += wp[0].wtPresent;
1384 }
1385
1386 if (bChroma)
1387 {
1388 for (int ref = 0; ref < slice.m_numRefIdx[list]; ref++)
1389 {
1390 wp = slice.m_weightPredTable[list][ref];
1391 WRITE_FLAG(!!wp[1].wtPresent, "chroma_weight_lX_flag");
1392 totalSignalledWeightFlags += 2 * wp[1].wtPresent;
1393 }
1394 }
1395
1396 for (int ref = 0; ref < slice.m_numRefIdx[list]; ref++)
1397 {
1398 wp = slice.m_weightPredTable[list][ref];
1399 if (wp[0].wtPresent)
1400 {
1401 int deltaWeight = (wp[0].inputWeight - (1 << wp[0].log2WeightDenom));
1402 WRITE_SVLC(deltaWeight, "delta_luma_weight_lX");
1403 WRITE_SVLC(wp[0].inputOffset, "luma_offset_lX");
1404 }
1405
1406 if (bChroma)
1407 {
1408 if (wp[1].wtPresent)
1409 {
1410 for (int plane = 1; plane < 3; plane++)
1411 {
1412 int deltaWeight = (wp[plane].inputWeight - (1 << wp[1].log2WeightDenom));
1413 WRITE_SVLC(deltaWeight, "delta_chroma_weight_lX");
1414
1415 int pred = (128 - ((128 * wp[plane].inputWeight) >> (wp[plane].log2WeightDenom)));
1416 int deltaChroma = (wp[plane].inputOffset - pred);
1417 WRITE_SVLC(deltaChroma, "delta_chroma_offset_lX");
1418 }
1419 }
1420 }
1421 }
1422 }
1423
1424 X265_CHECK(totalSignalledWeightFlags <= 24, "total weights must be <= 24\n");
1425 }
1426 }
1427
writeUnaryMaxSymbol(uint32_t symbol,uint8_t * scmModel,int offset,uint32_t maxSymbol)1428 void Entropy::writeUnaryMaxSymbol(uint32_t symbol, uint8_t* scmModel, int offset, uint32_t maxSymbol)
1429 {
1430 X265_CHECK(maxSymbol > 0, "maxSymbol too small\n");
1431
1432 encodeBin(symbol ? 1 : 0, scmModel[0]);
1433
1434 if (!symbol)
1435 return;
1436
1437 bool bCodeLast = (maxSymbol > symbol);
1438
1439 while (--symbol)
1440 encodeBin(1, scmModel[offset]);
1441
1442 if (bCodeLast)
1443 encodeBin(0, scmModel[offset]);
1444 }
1445
writeEpExGolomb(uint32_t symbol,uint32_t count)1446 void Entropy::writeEpExGolomb(uint32_t symbol, uint32_t count)
1447 {
1448 uint32_t bins = 0;
1449 int numBins = 0;
1450
1451 while (symbol >= (uint32_t)(1 << count))
1452 {
1453 bins = 2 * bins + 1;
1454 numBins++;
1455 symbol -= 1 << count;
1456 count++;
1457 }
1458
1459 bins = 2 * bins + 0;
1460 numBins++;
1461
1462 bins = (bins << count) | symbol;
1463 numBins += count;
1464
1465 X265_CHECK(numBins <= 32, "numBins too large\n");
1466 encodeBinsEP(bins, numBins);
1467 }
1468
1469 /** Coding of coeff_abs_level_minus3 */
writeCoefRemainExGolomb(uint32_t codeNumber,uint32_t absGoRice)1470 void Entropy::writeCoefRemainExGolomb(uint32_t codeNumber, uint32_t absGoRice)
1471 {
1472 uint32_t length;
1473 const uint32_t codeRemain = codeNumber & ((1 << absGoRice) - 1);
1474
1475 if ((codeNumber >> absGoRice) < COEF_REMAIN_BIN_REDUCTION)
1476 {
1477 length = codeNumber >> absGoRice;
1478
1479 X265_CHECK(codeNumber - (length << absGoRice) == (codeNumber & ((1 << absGoRice) - 1)), "codeNumber failure\n");
1480 X265_CHECK(length + 1 + absGoRice < 32, "length failure\n");
1481 encodeBinsEP((((1 << (length + 1)) - 2) << absGoRice) + codeRemain, length + 1 + absGoRice);
1482 }
1483 else
1484 {
1485 length = 0;
1486 codeNumber = (codeNumber >> absGoRice) - COEF_REMAIN_BIN_REDUCTION;
1487 {
1488 unsigned long idx;
1489 CLZ(idx, codeNumber + 1);
1490 length = idx;
1491 X265_CHECK((codeNumber != 0) || (length == 0), "length check failure\n");
1492 codeNumber -= (1 << idx) - 1;
1493 }
1494 codeNumber = (codeNumber << absGoRice) + codeRemain;
1495
1496 encodeBinsEP((1 << (COEF_REMAIN_BIN_REDUCTION + length + 1)) - 2, COEF_REMAIN_BIN_REDUCTION + length + 1);
1497 encodeBinsEP(codeNumber, length + absGoRice);
1498 }
1499 }
1500
1501 // SBAC RD
loadIntraDirModeLuma(const Entropy & src)1502 void Entropy::loadIntraDirModeLuma(const Entropy& src)
1503 {
1504 X265_CHECK(src.m_valid, "invalid copy source context\n");
1505 m_fracBits = src.m_fracBits;
1506 m_contextState[OFF_ADI_CTX] = src.m_contextState[OFF_ADI_CTX];
1507 }
1508
copyFrom(const Entropy & src)1509 void Entropy::copyFrom(const Entropy& src)
1510 {
1511 X265_CHECK(src.m_valid, "invalid copy source context\n");
1512
1513 copyState(src);
1514
1515 memcpy(m_contextState, src.m_contextState, MAX_OFF_CTX_MOD * sizeof(uint8_t));
1516 markValid();
1517 }
1518
codePartSize(const CUData & cu,uint32_t absPartIdx,uint32_t depth)1519 void Entropy::codePartSize(const CUData& cu, uint32_t absPartIdx, uint32_t depth)
1520 {
1521 PartSize partSize = (PartSize)cu.m_partSize[absPartIdx];
1522
1523 if (cu.isIntra(absPartIdx))
1524 {
1525 if (depth == cu.m_encData->m_param->maxCUDepth)
1526 encodeBin(partSize == SIZE_2Nx2N ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX]);
1527 return;
1528 }
1529
1530 switch (partSize)
1531 {
1532 case SIZE_2Nx2N:
1533 encodeBin(1, m_contextState[OFF_PART_SIZE_CTX]);
1534 break;
1535
1536 case SIZE_2NxN:
1537 case SIZE_2NxnU:
1538 case SIZE_2NxnD:
1539 encodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
1540 encodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 1]);
1541 if (cu.m_slice->m_sps->maxAMPDepth > depth)
1542 {
1543 encodeBin((partSize == SIZE_2NxN) ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX + 3]);
1544 if (partSize != SIZE_2NxN)
1545 encodeBinEP((partSize == SIZE_2NxnU ? 0 : 1));
1546 }
1547 break;
1548
1549 case SIZE_Nx2N:
1550 case SIZE_nLx2N:
1551 case SIZE_nRx2N:
1552 encodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
1553 encodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 1]);
1554 if (depth == cu.m_encData->m_param->maxCUDepth && !(cu.m_log2CUSize[absPartIdx] == 3))
1555 encodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 2]);
1556 if (cu.m_slice->m_sps->maxAMPDepth > depth)
1557 {
1558 encodeBin((partSize == SIZE_Nx2N) ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX + 3]);
1559 if (partSize != SIZE_Nx2N)
1560 encodeBinEP((partSize == SIZE_nLx2N ? 0 : 1));
1561 }
1562 break;
1563 default:
1564 X265_CHECK(0, "invalid CU partition\n");
1565 break;
1566 }
1567 }
1568
codeMergeIndex(const CUData & cu,uint32_t absPartIdx)1569 void Entropy::codeMergeIndex(const CUData& cu, uint32_t absPartIdx)
1570 {
1571 uint32_t numCand = cu.m_slice->m_maxNumMergeCand;
1572
1573 if (numCand > 1)
1574 {
1575 uint32_t unaryIdx = cu.m_mvpIdx[0][absPartIdx]; // merge candidate index was stored in L0 MVP idx
1576 encodeBin((unaryIdx != 0), m_contextState[OFF_MERGE_IDX_EXT_CTX]);
1577
1578 X265_CHECK(unaryIdx < numCand, "unaryIdx out of range\n");
1579
1580 if (unaryIdx != 0)
1581 {
1582 uint32_t mask = (1 << unaryIdx) - 2;
1583 mask >>= (unaryIdx == numCand - 1) ? 1 : 0;
1584 encodeBinsEP(mask, unaryIdx - (unaryIdx == numCand - 1));
1585 }
1586 }
1587 }
1588
codeIntraDirLumaAng(const CUData & cu,uint32_t absPartIdx,bool isMultiple)1589 void Entropy::codeIntraDirLumaAng(const CUData& cu, uint32_t absPartIdx, bool isMultiple)
1590 {
1591 uint32_t dir[4], j;
1592 uint32_t preds[4][3];
1593 int predIdx[4];
1594 uint32_t partNum = isMultiple && cu.m_partSize[absPartIdx] != SIZE_2Nx2N ? 4 : 1;
1595 uint32_t qNumParts = 1 << (cu.m_log2CUSize[absPartIdx] - 1 - LOG2_UNIT_SIZE) * 2;
1596
1597 for (j = 0; j < partNum; j++, absPartIdx += qNumParts)
1598 {
1599 dir[j] = cu.m_lumaIntraDir[absPartIdx];
1600 cu.getIntraDirLumaPredictor(absPartIdx, preds[j]);
1601 predIdx[j] = -1;
1602 for (uint32_t i = 0; i < 3; i++)
1603 if (dir[j] == preds[j][i])
1604 predIdx[j] = i;
1605
1606 encodeBin((predIdx[j] != -1) ? 1 : 0, m_contextState[OFF_ADI_CTX]);
1607 }
1608
1609 for (j = 0; j < partNum; j++)
1610 {
1611 if (predIdx[j] != -1)
1612 {
1613 X265_CHECK((predIdx[j] >= 0) && (predIdx[j] <= 2), "predIdx out of range\n");
1614 // NOTE: Mapping
1615 // 0 = 0
1616 // 1 = 10
1617 // 2 = 11
1618 int nonzero = (!!predIdx[j]);
1619 encodeBinsEP(predIdx[j] + nonzero, 1 + nonzero);
1620 }
1621 else
1622 {
1623 if (preds[j][0] > preds[j][1])
1624 std::swap(preds[j][0], preds[j][1]);
1625
1626 if (preds[j][0] > preds[j][2])
1627 std::swap(preds[j][0], preds[j][2]);
1628
1629 if (preds[j][1] > preds[j][2])
1630 std::swap(preds[j][1], preds[j][2]);
1631
1632 dir[j] += (dir[j] > preds[j][2]) ? -1 : 0;
1633 dir[j] += (dir[j] > preds[j][1]) ? -1 : 0;
1634 dir[j] += (dir[j] > preds[j][0]) ? -1 : 0;
1635
1636 encodeBinsEP(dir[j], 5);
1637 }
1638 }
1639 }
1640
codeIntraDirChroma(const CUData & cu,uint32_t absPartIdx,uint32_t * chromaDirMode)1641 void Entropy::codeIntraDirChroma(const CUData& cu, uint32_t absPartIdx, uint32_t *chromaDirMode)
1642 {
1643 uint32_t intraDirChroma = cu.m_chromaIntraDir[absPartIdx];
1644
1645 if (intraDirChroma == DM_CHROMA_IDX)
1646 encodeBin(0, m_contextState[OFF_CHROMA_PRED_CTX]);
1647 else
1648 {
1649 for (int i = 0; i < NUM_CHROMA_MODE - 1; i++)
1650 {
1651 if (intraDirChroma == chromaDirMode[i])
1652 {
1653 intraDirChroma = i;
1654 break;
1655 }
1656 }
1657
1658 encodeBin(1, m_contextState[OFF_CHROMA_PRED_CTX]);
1659 encodeBinsEP(intraDirChroma, 2);
1660 }
1661 }
1662
codeInterDir(const CUData & cu,uint32_t absPartIdx)1663 void Entropy::codeInterDir(const CUData& cu, uint32_t absPartIdx)
1664 {
1665 const uint32_t interDir = cu.m_interDir[absPartIdx] - 1;
1666 const uint32_t ctx = cu.m_cuDepth[absPartIdx]; // the context of the inter dir is the depth of the CU
1667
1668 if (cu.m_partSize[absPartIdx] == SIZE_2Nx2N || cu.m_log2CUSize[absPartIdx] != 3)
1669 encodeBin(interDir == 2 ? 1 : 0, m_contextState[OFF_INTER_DIR_CTX + ctx]);
1670 if (interDir < 2)
1671 encodeBin(interDir, m_contextState[OFF_INTER_DIR_CTX + 4]);
1672 }
1673
codeRefFrmIdx(const CUData & cu,uint32_t absPartIdx,int list)1674 void Entropy::codeRefFrmIdx(const CUData& cu, uint32_t absPartIdx, int list)
1675 {
1676 uint32_t refFrame = cu.m_refIdx[list][absPartIdx];
1677
1678 encodeBin(refFrame > 0, m_contextState[OFF_REF_NO_CTX]);
1679
1680 if (refFrame > 0)
1681 {
1682 uint32_t refNum = cu.m_slice->m_numRefIdx[list] - 2;
1683 if (refNum == 0)
1684 return;
1685
1686 refFrame--;
1687 encodeBin(refFrame > 0, m_contextState[OFF_REF_NO_CTX + 1]);
1688 if (refFrame > 0)
1689 {
1690 uint32_t mask = (1 << refFrame) - 2;
1691 mask >>= (refFrame == refNum) ? 1 : 0;
1692 encodeBinsEP(mask, refFrame - (refFrame == refNum));
1693 }
1694 }
1695 }
1696
codeMvd(const CUData & cu,uint32_t absPartIdx,int list)1697 void Entropy::codeMvd(const CUData& cu, uint32_t absPartIdx, int list)
1698 {
1699 const MV& mvd = cu.m_mvd[list][absPartIdx];
1700 const int hor = mvd.x;
1701 const int ver = mvd.y;
1702
1703 encodeBin(hor != 0 ? 1 : 0, m_contextState[OFF_MV_RES_CTX]);
1704 encodeBin(ver != 0 ? 1 : 0, m_contextState[OFF_MV_RES_CTX]);
1705
1706 const bool bHorAbsGr0 = hor != 0;
1707 const bool bVerAbsGr0 = ver != 0;
1708 const uint32_t horAbs = 0 > hor ? -hor : hor;
1709 const uint32_t verAbs = 0 > ver ? -ver : ver;
1710
1711 if (bHorAbsGr0)
1712 encodeBin(horAbs > 1 ? 1 : 0, m_contextState[OFF_MV_RES_CTX + 1]);
1713
1714 if (bVerAbsGr0)
1715 encodeBin(verAbs > 1 ? 1 : 0, m_contextState[OFF_MV_RES_CTX + 1]);
1716
1717 if (bHorAbsGr0)
1718 {
1719 if (horAbs > 1)
1720 writeEpExGolomb(horAbs - 2, 1);
1721
1722 encodeBinEP(0 > hor ? 1 : 0);
1723 }
1724
1725 if (bVerAbsGr0)
1726 {
1727 if (verAbs > 1)
1728 writeEpExGolomb(verAbs - 2, 1);
1729
1730 encodeBinEP(0 > ver ? 1 : 0);
1731 }
1732 }
1733
codeDeltaQP(const CUData & cu,uint32_t absPartIdx)1734 void Entropy::codeDeltaQP(const CUData& cu, uint32_t absPartIdx)
1735 {
1736 int dqp = cu.m_qp[absPartIdx] - cu.getRefQP(absPartIdx);
1737
1738 int qpBdOffsetY = QP_BD_OFFSET;
1739
1740 dqp = (dqp + 78 + qpBdOffsetY + (qpBdOffsetY / 2)) % (52 + qpBdOffsetY) - 26 - (qpBdOffsetY / 2);
1741
1742 uint32_t absDQp = (uint32_t)((dqp > 0) ? dqp : (-dqp));
1743 uint32_t TUValue = X265_MIN((int)absDQp, CU_DQP_TU_CMAX);
1744 writeUnaryMaxSymbol(TUValue, &m_contextState[OFF_DELTA_QP_CTX], 1, CU_DQP_TU_CMAX);
1745 if (absDQp >= CU_DQP_TU_CMAX)
1746 writeEpExGolomb(absDQp - CU_DQP_TU_CMAX, CU_DQP_EG_k);
1747
1748 if (absDQp > 0)
1749 {
1750 uint32_t sign = (dqp > 0 ? 0 : 1);
1751 encodeBinEP(sign);
1752 }
1753 }
1754
codeQtCbfChroma(const CUData & cu,uint32_t absPartIdx,TextType ttype,uint32_t tuDepth,bool lowestLevel)1755 void Entropy::codeQtCbfChroma(const CUData& cu, uint32_t absPartIdx, TextType ttype, uint32_t tuDepth, bool lowestLevel)
1756 {
1757 uint32_t ctx = tuDepth + 2;
1758
1759 uint32_t log2TrSize = cu.m_log2CUSize[absPartIdx] - tuDepth;
1760 bool canQuadSplit = (log2TrSize - cu.m_hChromaShift > 2);
1761 uint32_t lowestTUDepth = tuDepth + ((!lowestLevel && !canQuadSplit) ? 1 : 0); // unsplittable TUs inherit their parent's CBF
1762
1763 if (cu.m_chromaFormat == X265_CSP_I422 && (lowestLevel || !canQuadSplit)) // if sub-TUs are present
1764 {
1765 uint32_t subTUDepth = lowestTUDepth + 1; // if this is the lowest level of the TU-tree, the sub-TUs are directly below.
1766 // Otherwise, this must be the level above the lowest level (as specified above)
1767 uint32_t tuNumParts = 1 << ((log2TrSize - LOG2_UNIT_SIZE) * 2 - 1);
1768
1769 encodeBin(cu.getCbf(absPartIdx , ttype, subTUDepth), m_contextState[OFF_QT_CBF_CTX + ctx]);
1770 encodeBin(cu.getCbf(absPartIdx + tuNumParts, ttype, subTUDepth), m_contextState[OFF_QT_CBF_CTX + ctx]);
1771 }
1772 else
1773 encodeBin(cu.getCbf(absPartIdx, ttype, lowestTUDepth), m_contextState[OFF_QT_CBF_CTX + ctx]);
1774 }
1775
1776 #if CHECKED_BUILD || _DEBUG
costCoeffRemain_c0(uint16_t * absCoeff,int numNonZero)1777 uint32_t costCoeffRemain_c0(uint16_t *absCoeff, int numNonZero)
1778 {
1779 uint32_t goRiceParam = 0;
1780 int firstCoeff2 = 1;
1781 uint32_t baseLevelN = 0x5555AAAA; // 2-bits encode format baseLevel
1782
1783 uint32_t sum = 0;
1784 int idx = 0;
1785 do
1786 {
1787 int baseLevel = (baseLevelN & 3) | firstCoeff2;
1788 X265_CHECK(baseLevel == ((idx < C1FLAG_NUMBER) ? (2 + firstCoeff2) : 1), "baseLevel check failurr\n");
1789 baseLevelN >>= 2;
1790 int codeNumber = absCoeff[idx] - baseLevel;
1791
1792 if (codeNumber >= 0)
1793 {
1794 //writeCoefRemainExGolomb(absCoeff[idx] - baseLevel, goRiceParam);
1795 uint32_t length = 0;
1796
1797 codeNumber = ((uint32_t)codeNumber >> goRiceParam) - COEF_REMAIN_BIN_REDUCTION;
1798 if (codeNumber >= 0)
1799 {
1800 {
1801 unsigned long cidx;
1802 CLZ(cidx, codeNumber + 1);
1803 length = cidx;
1804 }
1805 X265_CHECK((codeNumber != 0) || (length == 0), "length check failure\n");
1806
1807 codeNumber = (length + length);
1808 }
1809 sum += (COEF_REMAIN_BIN_REDUCTION + 1 + goRiceParam + codeNumber);
1810
1811 if (absCoeff[idx] > (COEF_REMAIN_BIN_REDUCTION << goRiceParam))
1812 goRiceParam = (goRiceParam + 1) - (goRiceParam >> 2);
1813 X265_CHECK(goRiceParam <= 4, "goRiceParam check failure\n");
1814 }
1815 if (absCoeff[idx] >= 2)
1816 firstCoeff2 = 0;
1817 idx++;
1818 }
1819 while(idx < numNonZero);
1820
1821 return sum;
1822 }
1823 #endif // debug only code
1824
codeCoeffNxN(const CUData & cu,const coeff_t * coeff,uint32_t absPartIdx,uint32_t log2TrSize,TextType ttype)1825 void Entropy::codeCoeffNxN(const CUData& cu, const coeff_t* coeff, uint32_t absPartIdx, uint32_t log2TrSize, TextType ttype)
1826 {
1827 uint32_t trSize = 1 << log2TrSize;
1828 uint32_t tqBypass = cu.m_tqBypass[absPartIdx];
1829 // compute number of significant coefficients
1830 uint32_t numSig = primitives.cu[log2TrSize - 2].count_nonzero(coeff);
1831 X265_CHECK(numSig > 0, "cbf check fail\n");
1832 bool bHideFirstSign = cu.m_slice->m_pps->bSignHideEnabled & !tqBypass;
1833
1834 if (log2TrSize <= MAX_LOG2_TS_SIZE && !tqBypass && cu.m_slice->m_pps->bTransformSkipEnabled)
1835 codeTransformSkipFlags(cu.m_transformSkip[ttype][absPartIdx], ttype);
1836
1837 bool bIsLuma = ttype == TEXT_LUMA;
1838
1839 // select scans
1840 TUEntropyCodingParameters codingParameters;
1841 cu.getTUEntropyCodingParameters(codingParameters, absPartIdx, log2TrSize, bIsLuma);
1842
1843 uint8_t coeffNum[MLS_GRP_NUM]; // value range[0, 16]
1844 uint16_t coeffSign[MLS_GRP_NUM]; // bit mask map for non-zero coeff sign
1845 uint16_t coeffFlag[MLS_GRP_NUM]; // bit mask map for non-zero coeff
1846
1847 //----- encode significance map -----
1848
1849 // Find position of last coefficient
1850 int scanPosLast = 0;
1851 uint32_t posLast;
1852 uint64_t sigCoeffGroupFlag64 = 0;
1853 //const uint32_t maskPosXY = ((uint32_t)~0 >> (31 - log2TrSize + MLS_CG_LOG2_SIZE)) >> 1;
1854 X265_CHECK((uint32_t)((1 << (log2TrSize - MLS_CG_LOG2_SIZE)) - 1) == (((uint32_t)~0 >> (31 - log2TrSize + MLS_CG_LOG2_SIZE)) >> 1), "maskPosXY fault\n");
1855
1856 scanPosLast = primitives.scanPosLast(codingParameters.scan, coeff, coeffSign, coeffFlag, coeffNum, numSig, g_scan4x4[codingParameters.scanType], trSize);
1857 posLast = codingParameters.scan[scanPosLast];
1858
1859 const int lastScanSet = scanPosLast >> MLS_CG_SIZE;
1860
1861 // Calculate CG block non-zero mask, the latest CG always flag as non-zero in CG scan loop
1862 for(int idx = 0; idx < lastScanSet; idx++)
1863 {
1864 const uint8_t subSet = (uint8_t)codingParameters.scanCG[idx];
1865 const uint8_t nonZero = (coeffNum[idx] != 0);
1866 sigCoeffGroupFlag64 |= ((nonZero ? (uint64_t)1 : 0) << subSet);
1867 }
1868
1869
1870 // Code position of last coefficient
1871 {
1872 // The last position is composed of a prefix and suffix.
1873 // The prefix is context coded truncated unary bins. The suffix is bypass coded fixed length bins.
1874 // The bypass coded bins for both the x and y components are grouped together.
1875 uint32_t packedSuffixBits = 0, packedSuffixLen = 0;
1876 uint32_t pos[2] = { (posLast & (trSize - 1)), (posLast >> log2TrSize) };
1877 // swap
1878 if (codingParameters.scanType == SCAN_VER)
1879 std::swap(pos[0], pos[1]);
1880
1881 int ctxIdx = bIsLuma ? (3 * (log2TrSize - 2) + (log2TrSize == 5)) : NUM_CTX_LAST_FLAG_XY_LUMA;
1882 int ctxShift = (bIsLuma ? (log2TrSize > 2) : (log2TrSize - 2));
1883 uint32_t maxGroupIdx = (log2TrSize << 1) - 1;
1884 X265_CHECK(((log2TrSize - 1) >> 2) == (uint32_t)(log2TrSize == 5), "ctxIdx check failure\n");
1885 X265_CHECK((uint32_t)ctxShift == (bIsLuma ? ((log2TrSize + 1) >> 2) : log2TrSize - 2), "ctxShift check failure\n");
1886
1887 uint8_t *ctx = &m_contextState[OFF_CTX_LAST_FLAG_X];
1888 for (uint32_t i = 0; i < 2; i++, ctxIdx += NUM_CTX_LAST_FLAG_XY)
1889 {
1890 uint32_t temp = g_lastCoeffTable[pos[i]];
1891 uint32_t prefixOnes = temp & 15;
1892 uint32_t suffixLen = temp >> 4;
1893
1894 for (uint32_t ctxLast = 0; ctxLast < prefixOnes; ctxLast++)
1895 encodeBin(1, *(ctx + ctxIdx + (ctxLast >> ctxShift)));
1896
1897 if (prefixOnes < maxGroupIdx)
1898 encodeBin(0, *(ctx + ctxIdx + (prefixOnes >> ctxShift)));
1899
1900 packedSuffixBits <<= suffixLen;
1901 packedSuffixBits |= (pos[i] & ((1 << suffixLen) - 1));
1902 packedSuffixLen += suffixLen;
1903 }
1904
1905 encodeBinsEP(packedSuffixBits, packedSuffixLen);
1906 }
1907
1908 // code significance flag
1909 uint8_t * const baseCoeffGroupCtx = &m_contextState[OFF_SIG_CG_FLAG_CTX + (bIsLuma ? 0 : NUM_SIG_CG_FLAG_CTX)];
1910 uint8_t * const baseCtx = bIsLuma ? &m_contextState[OFF_SIG_FLAG_CTX] : &m_contextState[OFF_SIG_FLAG_CTX + NUM_SIG_FLAG_CTX_LUMA];
1911 uint32_t c1 = 1;
1912 int scanPosSigOff = scanPosLast - (lastScanSet << MLS_CG_SIZE) - 1;
1913 ALIGN_VAR_32(uint16_t, absCoeff[(1 << MLS_CG_SIZE) + 1]); // extra 2 bytes(+1) space for AVX2 assembly, +1 because (numNonZero<=1) in costCoeffNxN path
1914 uint32_t numNonZero = 1;
1915 unsigned long lastNZPosInCG;
1916 unsigned long firstNZPosInCG;
1917
1918 #if _DEBUG
1919 // Unnecessary, for Valgrind-3.10.0 only
1920 memset(absCoeff, 0, sizeof(absCoeff));
1921 #endif
1922
1923 absCoeff[0] = (uint16_t)abs(coeff[posLast]);
1924
1925 for (int subSet = lastScanSet; subSet >= 0; subSet--)
1926 {
1927 const uint32_t subCoeffFlag = coeffFlag[subSet];
1928 uint32_t scanFlagMask = subCoeffFlag;
1929 int subPosBase = subSet << MLS_CG_SIZE;
1930
1931 if (subSet == lastScanSet)
1932 {
1933 X265_CHECK(scanPosSigOff == scanPosLast - (lastScanSet << MLS_CG_SIZE) - 1, "scanPos mistake\n");
1934 scanFlagMask >>= 1;
1935 }
1936
1937 // encode significant_coeffgroup_flag
1938 const int cgBlkPos = codingParameters.scanCG[subSet];
1939 const int cgPosY = (uint32_t)cgBlkPos >> (log2TrSize - MLS_CG_LOG2_SIZE);
1940 const int cgPosX = cgBlkPos & ((1 << (log2TrSize - MLS_CG_LOG2_SIZE)) - 1);
1941 const uint64_t cgBlkPosMask = ((uint64_t)1 << cgBlkPos);
1942
1943 if (subSet == lastScanSet || !subSet)
1944 sigCoeffGroupFlag64 |= cgBlkPosMask;
1945 else
1946 {
1947 uint32_t sigCoeffGroup = ((sigCoeffGroupFlag64 & cgBlkPosMask) != 0);
1948 uint32_t ctxSig = Quant::getSigCoeffGroupCtxInc(sigCoeffGroupFlag64, cgPosX, cgPosY, cgBlkPos, (trSize >> MLS_CG_LOG2_SIZE));
1949 encodeBin(sigCoeffGroup, baseCoeffGroupCtx[ctxSig]);
1950 }
1951
1952 // encode significant_coeff_flag
1953 if ((scanPosSigOff >= 0) && (sigCoeffGroupFlag64 & cgBlkPosMask))
1954 {
1955 X265_CHECK((log2TrSize != 2) || (log2TrSize == 2 && subSet == 0), "log2TrSize and subSet mistake!\n");
1956 const int patternSigCtx = Quant::calcPatternSigCtx(sigCoeffGroupFlag64, cgPosX, cgPosY, cgBlkPos, (trSize >> MLS_CG_LOG2_SIZE));
1957 const uint32_t posOffset = (bIsLuma && subSet) ? 3 : 0;
1958
1959 // NOTE: [patternSigCtx][posXinSubset][posYinSubset]
1960 static const uint8_t table_cnt[5][SCAN_SET_SIZE] =
1961 {
1962 // patternSigCtx = 0
1963 {
1964 2, 1, 1, 0,
1965 1, 1, 0, 0,
1966 1, 0, 0, 0,
1967 0, 0, 0, 0,
1968 },
1969 // patternSigCtx = 1
1970 {
1971 2, 2, 2, 2,
1972 1, 1, 1, 1,
1973 0, 0, 0, 0,
1974 0, 0, 0, 0,
1975 },
1976 // patternSigCtx = 2
1977 {
1978 2, 1, 0, 0,
1979 2, 1, 0, 0,
1980 2, 1, 0, 0,
1981 2, 1, 0, 0,
1982 },
1983 // patternSigCtx = 3
1984 {
1985 2, 2, 2, 2,
1986 2, 2, 2, 2,
1987 2, 2, 2, 2,
1988 2, 2, 2, 2,
1989 },
1990 // 4x4
1991 {
1992 0, 1, 4, 5,
1993 2, 3, 4, 5,
1994 6, 6, 8, 8,
1995 7, 7, 8, 8
1996 }
1997 };
1998
1999 const int offset = codingParameters.firstSignificanceMapContext;
2000 const uint32_t blkPosBase = codingParameters.scan[subPosBase];
2001
2002 X265_CHECK(scanPosSigOff >= 0, "scanPosSigOff check failure\n");
2003 if (m_bitIf)
2004 {
2005 ALIGN_VAR_32(uint16_t, tmpCoeff[SCAN_SET_SIZE]);
2006
2007 // TODO: accelerate by PABSW
2008 for (int i = 0; i < MLS_CG_SIZE; i++)
2009 {
2010 tmpCoeff[i * MLS_CG_SIZE + 0] = (uint16_t)abs(coeff[blkPosBase + i * trSize + 0]);
2011 tmpCoeff[i * MLS_CG_SIZE + 1] = (uint16_t)abs(coeff[blkPosBase + i * trSize + 1]);
2012 tmpCoeff[i * MLS_CG_SIZE + 2] = (uint16_t)abs(coeff[blkPosBase + i * trSize + 2]);
2013 tmpCoeff[i * MLS_CG_SIZE + 3] = (uint16_t)abs(coeff[blkPosBase + i * trSize + 3]);
2014 }
2015
2016 if (log2TrSize == 2)
2017 {
2018 do
2019 {
2020 uint32_t blkPos, sig, ctxSig;
2021 blkPos = g_scan4x4[codingParameters.scanType][scanPosSigOff];
2022 sig = scanFlagMask & 1;
2023 scanFlagMask >>= 1;
2024 X265_CHECK((uint32_t)(tmpCoeff[blkPos] != 0) == sig, "sign bit mistake\n");
2025 {
2026 ctxSig = table_cnt[4][blkPos];
2027 X265_CHECK(ctxSig == Quant::getSigCtxInc(patternSigCtx, log2TrSize, trSize, blkPos, bIsLuma, codingParameters.firstSignificanceMapContext), "sigCtx mistake!\n");;
2028 encodeBin(sig, baseCtx[ctxSig]);
2029 }
2030 absCoeff[numNonZero] = tmpCoeff[blkPos];
2031 numNonZero += sig;
2032 scanPosSigOff--;
2033 }
2034 while(scanPosSigOff >= 0);
2035 }
2036 else
2037 {
2038 X265_CHECK((log2TrSize > 2), "log2TrSize must be more than 2 in this path!\n");
2039
2040 const uint8_t *tabSigCtx = table_cnt[(uint32_t)patternSigCtx];
2041 do
2042 {
2043 uint32_t blkPos, sig, ctxSig;
2044 blkPos = g_scan4x4[codingParameters.scanType][scanPosSigOff];
2045 const uint32_t posZeroMask = (subPosBase + scanPosSigOff) ? ~0 : 0;
2046 sig = scanFlagMask & 1;
2047 scanFlagMask >>= 1;
2048 X265_CHECK((uint32_t)(tmpCoeff[blkPos] != 0) == sig, "sign bit mistake\n");
2049 if (scanPosSigOff != 0 || subSet == 0 || numNonZero)
2050 {
2051 const uint32_t cnt = tabSigCtx[blkPos] + offset;
2052 ctxSig = (cnt + posOffset) & posZeroMask;
2053
2054 X265_CHECK(ctxSig == Quant::getSigCtxInc(patternSigCtx, log2TrSize, trSize, codingParameters.scan[subPosBase + scanPosSigOff], bIsLuma, codingParameters.firstSignificanceMapContext), "sigCtx mistake!\n");;
2055 encodeBin(sig, baseCtx[ctxSig]);
2056 }
2057 absCoeff[numNonZero] = tmpCoeff[blkPos];
2058 numNonZero += sig;
2059 scanPosSigOff--;
2060 }
2061 while(scanPosSigOff >= 0);
2062 }
2063 }
2064 else // fast RD path
2065 {
2066 // maximum g_entropyBits are 18-bits and maximum of count are 16, so intermedia of sum are 22-bits
2067 const uint8_t *tabSigCtx = table_cnt[(log2TrSize == 2) ? 4 : (uint32_t)patternSigCtx];
2068 X265_CHECK(numNonZero <= 1, "numNonZero check failure");
2069 uint32_t sum = primitives.costCoeffNxN(g_scan4x4[codingParameters.scanType], &coeff[blkPosBase], (intptr_t)trSize, absCoeff + numNonZero, tabSigCtx, scanFlagMask, baseCtx, offset + posOffset, scanPosSigOff, subPosBase);
2070
2071 #if CHECKED_BUILD || _DEBUG
2072 numNonZero = coeffNum[subSet];
2073 #endif
2074 // update RD cost
2075 m_fracBits += sum;
2076 } // end of fast RD path -- !m_bitIf
2077 }
2078 X265_CHECK(coeffNum[subSet] == numNonZero, "coefNum mistake\n");
2079
2080 uint32_t coeffSigns = coeffSign[subSet];
2081 numNonZero = coeffNum[subSet];
2082 if (numNonZero > 0)
2083 {
2084 uint32_t idx;
2085 X265_CHECK(subCoeffFlag > 0, "subCoeffFlag is zero\n");
2086 CLZ(lastNZPosInCG, subCoeffFlag);
2087 CTZ(firstNZPosInCG, subCoeffFlag);
2088
2089 bool signHidden = (lastNZPosInCG - firstNZPosInCG >= SBH_THRESHOLD);
2090 const uint8_t ctxSet = (((subSet > 0) + bIsLuma) & 2) + !(c1 & 3);
2091 X265_CHECK((((subSet > 0) & bIsLuma) ? 2 : 0) + !(c1 & 3) == ctxSet, "ctxSet check failure\n");
2092
2093 c1 = 1;
2094 uint8_t *baseCtxMod = &m_contextState[(bIsLuma ? 0 : NUM_ONE_FLAG_CTX_LUMA) + OFF_ONE_FLAG_CTX + 4 * ctxSet];
2095
2096 uint32_t numC1Flag = X265_MIN(numNonZero, C1FLAG_NUMBER);
2097 X265_CHECK(numC1Flag > 0, "numC1Flag check failure\n");
2098
2099 if (!m_bitIf)
2100 {
2101 uint32_t sum = primitives.costC1C2Flag(absCoeff, numC1Flag, baseCtxMod, (bIsLuma ? 0 : NUM_ABS_FLAG_CTX_LUMA - NUM_ONE_FLAG_CTX_LUMA) + (OFF_ABS_FLAG_CTX - OFF_ONE_FLAG_CTX) - 3 * ctxSet);
2102 uint32_t firstC2Idx = (sum >> 28);
2103 c1 = ((sum >> 26) & 3);
2104 m_fracBits += sum & 0x00FFFFFF;
2105
2106 const int hiddenShift = (bHideFirstSign & signHidden) ? -1 : 0;
2107 //encodeBinsEP((coeffSigns >> hiddenShift), numNonZero - hiddenShift);
2108 m_fracBits += (numNonZero + hiddenShift) << 15;
2109
2110 if (numNonZero > firstC2Idx)
2111 {
2112 sum = primitives.costCoeffRemain(absCoeff, numNonZero, firstC2Idx);
2113 X265_CHECK(sum == costCoeffRemain_c0(absCoeff, numNonZero), "costCoeffRemain check failure\n");
2114 m_fracBits += ((uint64_t)sum << 15);
2115 }
2116 }
2117 // Standard path
2118 else
2119 {
2120 uint32_t firstC2Idx = 8;
2121 uint32_t firstC2Flag = 2;
2122 uint32_t c1Next = 0xFFFFFFFE;
2123
2124 idx = 0;
2125 do
2126 {
2127 const uint32_t symbol1 = absCoeff[idx] > 1;
2128 const uint32_t symbol2 = absCoeff[idx] > 2;
2129 encodeBin(symbol1, baseCtxMod[c1]);
2130
2131 if (symbol1)
2132 c1Next = 0;
2133
2134 firstC2Flag = (symbol1 + firstC2Flag == 3) ? symbol2 : firstC2Flag;
2135 firstC2Idx = (symbol1 + firstC2Idx == 9) ? idx : firstC2Idx;
2136
2137 c1 = (c1Next & 3);
2138 c1Next >>= 2;
2139 X265_CHECK(c1 <= 3, "c1 check failure\n");
2140 idx++;
2141 }
2142 while(idx < numC1Flag);
2143
2144 if (!c1)
2145 {
2146 baseCtxMod = &m_contextState[(bIsLuma ? 0 : NUM_ABS_FLAG_CTX_LUMA) + OFF_ABS_FLAG_CTX + ctxSet];
2147
2148 X265_CHECK((firstC2Flag <= 1), "firstC2FlagIdx check failure\n");
2149 encodeBin(firstC2Flag, baseCtxMod[0]);
2150 }
2151
2152 const int hiddenShift = (bHideFirstSign && signHidden) ? 1 : 0;
2153 encodeBinsEP((coeffSigns >> hiddenShift), numNonZero - hiddenShift);
2154
2155 if (!c1 || numNonZero > C1FLAG_NUMBER)
2156 {
2157 // Standard path
2158 uint32_t goRiceParam = 0;
2159 int baseLevel = 3;
2160 uint32_t threshold = COEF_REMAIN_BIN_REDUCTION;
2161 #if CHECKED_BUILD || _DEBUG
2162 int firstCoeff2 = 1;
2163 #endif
2164 idx = firstC2Idx;
2165 do
2166 {
2167 if (idx >= C1FLAG_NUMBER)
2168 baseLevel = 1;
2169 // TODO: fast algorithm maybe broken this check logic
2170 X265_CHECK(baseLevel == ((idx < C1FLAG_NUMBER) ? (2 + firstCoeff2) : 1), "baseLevel check failurr\n");
2171
2172 if (absCoeff[idx] >= baseLevel)
2173 {
2174 writeCoefRemainExGolomb(absCoeff[idx] - baseLevel, goRiceParam);
2175 X265_CHECK(threshold == (uint32_t)(COEF_REMAIN_BIN_REDUCTION << goRiceParam), "COEF_REMAIN_BIN_REDUCTION check failure\n");
2176 const int adjust = (absCoeff[idx] > threshold) & (goRiceParam <= 3);
2177 goRiceParam += adjust;
2178 threshold += (adjust) ? threshold : 0;
2179 X265_CHECK(goRiceParam <= 4, "goRiceParam check failure\n");
2180 }
2181 #if CHECKED_BUILD || _DEBUG
2182 firstCoeff2 = 0;
2183 #endif
2184 baseLevel = 2;
2185 idx++;
2186 }
2187 while(idx < numNonZero);
2188 }
2189 } // end of !bitIf
2190 } // end of (numNonZero > 0)
2191
2192 // Initialize value for next loop
2193 numNonZero = 0;
2194 scanPosSigOff = (1 << MLS_CG_SIZE) - 1;
2195 }
2196 }
2197
codeSaoMaxUvlc(uint32_t code,uint32_t maxSymbol)2198 void Entropy::codeSaoMaxUvlc(uint32_t code, uint32_t maxSymbol)
2199 {
2200 X265_CHECK(maxSymbol > 0, "maxSymbol too small\n");
2201
2202 uint32_t isCodeNonZero = !!code;
2203
2204 encodeBinEP(isCodeNonZero);
2205 if (isCodeNonZero)
2206 {
2207 uint32_t isCodeLast = (maxSymbol > code);
2208 uint32_t mask = (1 << (code - 1)) - 1;
2209 uint32_t len = code - 1 + isCodeLast;
2210 mask <<= isCodeLast;
2211
2212 encodeBinsEP(mask, len);
2213 }
2214 }
2215
2216 /* estimate bit cost for CBP, significant map and significant coefficients */
estBit(EstBitsSbac & estBitsSbac,uint32_t log2TrSize,bool bIsLuma) const2217 void Entropy::estBit(EstBitsSbac& estBitsSbac, uint32_t log2TrSize, bool bIsLuma) const
2218 {
2219 estCBFBit(estBitsSbac);
2220
2221 estSignificantCoeffGroupMapBit(estBitsSbac, bIsLuma);
2222
2223 // encode significance map
2224 estSignificantMapBit(estBitsSbac, log2TrSize, bIsLuma);
2225
2226 // encode significant coefficients
2227 estSignificantCoefficientsBit(estBitsSbac, bIsLuma);
2228 }
2229
2230 /* estimate bit cost for each CBP bit */
estCBFBit(EstBitsSbac & estBitsSbac) const2231 void Entropy::estCBFBit(EstBitsSbac& estBitsSbac) const
2232 {
2233 const uint8_t *ctx = &m_contextState[OFF_QT_CBF_CTX];
2234
2235 for (uint32_t ctxInc = 0; ctxInc < NUM_QT_CBF_CTX; ctxInc++)
2236 {
2237 estBitsSbac.blockCbpBits[ctxInc][0] = sbacGetEntropyBits(ctx[ctxInc], 0);
2238 estBitsSbac.blockCbpBits[ctxInc][1] = sbacGetEntropyBits(ctx[ctxInc], 1);
2239 }
2240
2241 ctx = &m_contextState[OFF_QT_ROOT_CBF_CTX];
2242
2243 estBitsSbac.blockRootCbpBits[0] = sbacGetEntropyBits(ctx[0], 0);
2244 estBitsSbac.blockRootCbpBits[1] = sbacGetEntropyBits(ctx[0], 1);
2245 }
2246
2247 /* estimate SAMBAC bit cost for significant coefficient group map */
estSignificantCoeffGroupMapBit(EstBitsSbac & estBitsSbac,bool bIsLuma) const2248 void Entropy::estSignificantCoeffGroupMapBit(EstBitsSbac& estBitsSbac, bool bIsLuma) const
2249 {
2250 int firstCtx = 0, numCtx = NUM_SIG_CG_FLAG_CTX;
2251
2252 for (int ctxIdx = firstCtx; ctxIdx < firstCtx + numCtx; ctxIdx++)
2253 for (uint32_t bin = 0; bin < 2; bin++)
2254 estBitsSbac.significantCoeffGroupBits[ctxIdx][bin] = sbacGetEntropyBits(m_contextState[OFF_SIG_CG_FLAG_CTX + ((bIsLuma ? 0 : NUM_SIG_CG_FLAG_CTX) + ctxIdx)], bin);
2255 }
2256
2257 /* estimate SAMBAC bit cost for significant coefficient map */
estSignificantMapBit(EstBitsSbac & estBitsSbac,uint32_t log2TrSize,bool bIsLuma) const2258 void Entropy::estSignificantMapBit(EstBitsSbac& estBitsSbac, uint32_t log2TrSize, bool bIsLuma) const
2259 {
2260 int firstCtx = 1, numCtx = 8;
2261
2262 if (log2TrSize >= 4)
2263 {
2264 firstCtx = bIsLuma ? 21 : 12;
2265 numCtx = bIsLuma ? 6 : 3;
2266 }
2267 else if (log2TrSize == 3)
2268 {
2269 firstCtx = 9;
2270 numCtx = bIsLuma ? 12 : 3;
2271 }
2272
2273 const int ctxSigOffset = OFF_SIG_FLAG_CTX + (bIsLuma ? 0 : NUM_SIG_FLAG_CTX_LUMA);
2274
2275 estBitsSbac.significantBits[0][0] = sbacGetEntropyBits(m_contextState[ctxSigOffset], 0);
2276 estBitsSbac.significantBits[1][0] = sbacGetEntropyBits(m_contextState[ctxSigOffset], 1);
2277
2278 for (int ctxIdx = firstCtx; ctxIdx < firstCtx + numCtx; ctxIdx++)
2279 {
2280 estBitsSbac.significantBits[0][ctxIdx] = sbacGetEntropyBits(m_contextState[ctxSigOffset + ctxIdx], 0);
2281 estBitsSbac.significantBits[1][ctxIdx] = sbacGetEntropyBits(m_contextState[ctxSigOffset + ctxIdx], 1);
2282 }
2283
2284 const uint32_t maxGroupIdx = log2TrSize * 2 - 1;
2285 if (bIsLuma)
2286 {
2287 if (log2TrSize == 2)
2288 {
2289 for (int i = 0, ctxIdx = 0; i < 2; i++, ctxIdx += NUM_CTX_LAST_FLAG_XY)
2290 {
2291 int bits = 0;
2292 const uint8_t *ctxState = &m_contextState[OFF_CTX_LAST_FLAG_X + ctxIdx];
2293
2294 for (uint32_t ctx = 0; ctx < 3; ctx++)
2295 {
2296 estBitsSbac.lastBits[i][ctx] = bits + sbacGetEntropyBits(ctxState[ctx], 0);
2297 bits += sbacGetEntropyBits(ctxState[ctx], 1);
2298 }
2299
2300 estBitsSbac.lastBits[i][maxGroupIdx] = bits;
2301 }
2302 }
2303 else
2304 {
2305 const int blkSizeOffset = ((log2TrSize - 2) * 3 + (log2TrSize == 5));
2306
2307 for (int i = 0, ctxIdx = 0; i < 2; i++, ctxIdx += NUM_CTX_LAST_FLAG_XY)
2308 {
2309 int bits = 0;
2310 const uint8_t *ctxState = &m_contextState[OFF_CTX_LAST_FLAG_X + ctxIdx];
2311 X265_CHECK(maxGroupIdx & 1, "maxGroupIdx check failure\n");
2312
2313 for (uint32_t ctx = 0; ctx < (maxGroupIdx >> 1) + 1; ctx++)
2314 {
2315 const int cost0 = sbacGetEntropyBits(ctxState[blkSizeOffset + ctx], 0);
2316 const int cost1 = sbacGetEntropyBits(ctxState[blkSizeOffset + ctx], 1);
2317 estBitsSbac.lastBits[i][ctx * 2 + 0] = bits + cost0;
2318 estBitsSbac.lastBits[i][ctx * 2 + 1] = bits + cost1 + cost0;
2319 bits += 2 * cost1;
2320 }
2321 // correct latest bit cost, it didn't include cost0
2322 estBitsSbac.lastBits[i][maxGroupIdx] -= sbacGetEntropyBits(ctxState[blkSizeOffset + (maxGroupIdx >> 1)], 0);
2323 }
2324 }
2325 }
2326 else
2327 {
2328 const int blkSizeOffset = NUM_CTX_LAST_FLAG_XY_LUMA;
2329 const int ctxShift = log2TrSize - 2;
2330
2331 for (int i = 0, ctxIdx = 0; i < 2; i++, ctxIdx += NUM_CTX_LAST_FLAG_XY)
2332 {
2333 int bits = 0;
2334 const uint8_t *ctxState = &m_contextState[OFF_CTX_LAST_FLAG_X + ctxIdx];
2335
2336 for (uint32_t ctx = 0; ctx < maxGroupIdx; ctx++)
2337 {
2338 int ctxOffset = blkSizeOffset + (ctx >> ctxShift);
2339 estBitsSbac.lastBits[i][ctx] = bits + sbacGetEntropyBits(ctxState[ctxOffset], 0);
2340 bits += sbacGetEntropyBits(ctxState[ctxOffset], 1);
2341 }
2342
2343 estBitsSbac.lastBits[i][maxGroupIdx] = bits;
2344 }
2345 }
2346 }
2347
2348 /* estimate bit cost of significant coefficient */
estSignificantCoefficientsBit(EstBitsSbac & estBitsSbac,bool bIsLuma) const2349 void Entropy::estSignificantCoefficientsBit(EstBitsSbac& estBitsSbac, bool bIsLuma) const
2350 {
2351 if (bIsLuma)
2352 {
2353 const uint8_t *ctxOne = &m_contextState[OFF_ONE_FLAG_CTX];
2354 const uint8_t *ctxAbs = &m_contextState[OFF_ABS_FLAG_CTX];
2355
2356 for (int ctxIdx = 0; ctxIdx < NUM_ONE_FLAG_CTX_LUMA; ctxIdx++)
2357 {
2358 estBitsSbac.greaterOneBits[ctxIdx][0] = sbacGetEntropyBits(ctxOne[ctxIdx], 0);
2359 estBitsSbac.greaterOneBits[ctxIdx][1] = sbacGetEntropyBits(ctxOne[ctxIdx], 1);
2360 }
2361
2362 for (int ctxIdx = 0; ctxIdx < NUM_ABS_FLAG_CTX_LUMA; ctxIdx++)
2363 {
2364 estBitsSbac.levelAbsBits[ctxIdx][0] = sbacGetEntropyBits(ctxAbs[ctxIdx], 0);
2365 estBitsSbac.levelAbsBits[ctxIdx][1] = sbacGetEntropyBits(ctxAbs[ctxIdx], 1);
2366 }
2367 }
2368 else
2369 {
2370 const uint8_t *ctxOne = &m_contextState[OFF_ONE_FLAG_CTX + NUM_ONE_FLAG_CTX_LUMA];
2371 const uint8_t *ctxAbs = &m_contextState[OFF_ABS_FLAG_CTX + NUM_ABS_FLAG_CTX_LUMA];
2372
2373 for (int ctxIdx = 0; ctxIdx < NUM_ONE_FLAG_CTX_CHROMA; ctxIdx++)
2374 {
2375 estBitsSbac.greaterOneBits[ctxIdx][0] = sbacGetEntropyBits(ctxOne[ctxIdx], 0);
2376 estBitsSbac.greaterOneBits[ctxIdx][1] = sbacGetEntropyBits(ctxOne[ctxIdx], 1);
2377 }
2378
2379 for (int ctxIdx = 0; ctxIdx < NUM_ABS_FLAG_CTX_CHROMA; ctxIdx++)
2380 {
2381 estBitsSbac.levelAbsBits[ctxIdx][0] = sbacGetEntropyBits(ctxAbs[ctxIdx], 0);
2382 estBitsSbac.levelAbsBits[ctxIdx][1] = sbacGetEntropyBits(ctxAbs[ctxIdx], 1);
2383 }
2384 }
2385 }
2386
2387 /* Initialize our context information from the nominated source */
copyContextsFrom(const Entropy & src)2388 void Entropy::copyContextsFrom(const Entropy& src)
2389 {
2390 X265_CHECK(src.m_valid, "invalid copy source context\n");
2391
2392 memcpy(m_contextState, src.m_contextState, MAX_OFF_CTX_MOD * sizeof(m_contextState[0]));
2393 markValid();
2394 }
2395
start()2396 void Entropy::start()
2397 {
2398 m_low = 0;
2399 m_range = 510;
2400 m_bitsLeft = -12;
2401 m_numBufferedBytes = 0;
2402 m_bufferedByte = 0xff;
2403 }
2404
finish()2405 void Entropy::finish()
2406 {
2407 if (m_low >> (21 + m_bitsLeft))
2408 {
2409 m_bitIf->writeByte(m_bufferedByte + 1);
2410 while (m_numBufferedBytes > 1)
2411 {
2412 m_bitIf->writeByte(0x00);
2413 m_numBufferedBytes--;
2414 }
2415
2416 m_low -= 1 << (21 + m_bitsLeft);
2417 }
2418 else
2419 {
2420 if (m_numBufferedBytes > 0)
2421 m_bitIf->writeByte(m_bufferedByte);
2422
2423 while (m_numBufferedBytes > 1)
2424 {
2425 m_bitIf->writeByte(0xff);
2426 m_numBufferedBytes--;
2427 }
2428 }
2429 m_bitIf->write(m_low >> 8, 13 + m_bitsLeft);
2430 }
2431
copyState(const Entropy & other)2432 void Entropy::copyState(const Entropy& other)
2433 {
2434 m_low = other.m_low;
2435 m_range = other.m_range;
2436 m_bitsLeft = other.m_bitsLeft;
2437 m_bufferedByte = other.m_bufferedByte;
2438 m_numBufferedBytes = other.m_numBufferedBytes;
2439 m_fracBits = other.m_fracBits;
2440 }
2441
resetBits()2442 void Entropy::resetBits()
2443 {
2444 m_low = 0;
2445 m_bitsLeft = -12;
2446 m_numBufferedBytes = 0;
2447 m_bufferedByte = 0xff;
2448 m_fracBits &= 32767;
2449 if (m_bitIf)
2450 m_bitIf->resetBits();
2451 }
2452
2453 /** Encode bin */
encodeBin(uint32_t binValue,uint8_t & ctxModel)2454 void Entropy::encodeBin(uint32_t binValue, uint8_t &ctxModel)
2455 {
2456 uint32_t mstate = ctxModel;
2457
2458 ctxModel = sbacNext(mstate, binValue);
2459
2460 if (!m_bitIf)
2461 {
2462 m_fracBits += sbacGetEntropyBits(mstate, binValue);
2463 return;
2464 }
2465
2466 uint32_t range = m_range;
2467 uint32_t state = sbacGetState(mstate);
2468 uint32_t lps = g_lpsTable[state][((uint8_t)range >> 6)];
2469 range -= lps;
2470
2471 X265_CHECK(lps >= 2, "lps is too small\n");
2472
2473 int numBits = (uint32_t)(range - 256) >> 31;
2474 uint32_t low = m_low;
2475
2476 // NOTE: MPS must be LOWEST bit in mstate
2477 X265_CHECK((uint32_t)((binValue ^ mstate) & 1) == (uint32_t)(binValue != sbacGetMps(mstate)), "binValue failure\n");
2478 if ((binValue ^ mstate) & 1)
2479 {
2480 // NOTE: lps is non-zero and the maximum of idx is 8 because lps less than 256
2481 //numBits = g_renormTable[lps >> 3];
2482 unsigned long idx;
2483 CLZ(idx, lps);
2484 X265_CHECK(state != 63 || idx == 1, "state failure\n");
2485
2486 numBits = 8 - idx;
2487 if (state >= 63)
2488 numBits = 6;
2489 X265_CHECK(numBits <= 6, "numBits failure\n");
2490
2491 low += range;
2492 range = lps;
2493 }
2494 m_low = (low << numBits);
2495 m_range = (range << numBits);
2496 m_bitsLeft += numBits;
2497
2498 if (m_bitsLeft >= 0)
2499 writeOut();
2500 }
2501
2502 /** Encode equiprobable bin */
encodeBinEP(uint32_t binValue)2503 void Entropy::encodeBinEP(uint32_t binValue)
2504 {
2505 if (!m_bitIf)
2506 {
2507 m_fracBits += 32768;
2508 return;
2509 }
2510 m_low <<= 1;
2511 if (binValue)
2512 m_low += m_range;
2513 m_bitsLeft++;
2514
2515 if (m_bitsLeft >= 0)
2516 writeOut();
2517 }
2518
2519 /** Encode equiprobable bins */
encodeBinsEP(uint32_t binValues,int numBins)2520 void Entropy::encodeBinsEP(uint32_t binValues, int numBins)
2521 {
2522 if (!m_bitIf)
2523 {
2524 m_fracBits += 32768 * numBins;
2525 return;
2526 }
2527
2528 while (numBins > 8)
2529 {
2530 numBins -= 8;
2531 uint32_t pattern = binValues >> numBins;
2532 m_low <<= 8;
2533 m_low += m_range * pattern;
2534 binValues -= pattern << numBins;
2535 m_bitsLeft += 8;
2536
2537 if (m_bitsLeft >= 0)
2538 writeOut();
2539 }
2540
2541 m_low <<= numBins;
2542 m_low += m_range * binValues;
2543 m_bitsLeft += numBins;
2544
2545 if (m_bitsLeft >= 0)
2546 writeOut();
2547 }
2548
2549 /** Encode terminating bin */
encodeBinTrm(uint32_t binValue)2550 void Entropy::encodeBinTrm(uint32_t binValue)
2551 {
2552 if (!m_bitIf)
2553 {
2554 m_fracBits += sbacGetEntropyBitsTrm(binValue);
2555 return;
2556 }
2557
2558 m_range -= 2;
2559 if (binValue)
2560 {
2561 m_low += m_range;
2562 m_low <<= 7;
2563 m_range = 2 << 7;
2564 m_bitsLeft += 7;
2565 }
2566 else if (m_range >= 256)
2567 return;
2568 else
2569 {
2570 m_low <<= 1;
2571 m_range <<= 1;
2572 m_bitsLeft++;
2573 }
2574
2575 if (m_bitsLeft >= 0)
2576 writeOut();
2577 }
2578
2579 /** Move bits from register into bitstream */
writeOut()2580 void Entropy::writeOut()
2581 {
2582 uint32_t leadByte = m_low >> (13 + m_bitsLeft);
2583 uint32_t low_mask = (uint32_t)(~0) >> (11 + 8 - m_bitsLeft);
2584
2585 m_bitsLeft -= 8;
2586 m_low &= low_mask;
2587
2588 if (leadByte == 0xff)
2589 m_numBufferedBytes++;
2590 else
2591 {
2592 uint32_t numBufferedBytes = m_numBufferedBytes;
2593 if (numBufferedBytes > 0)
2594 {
2595 uint32_t carry = leadByte >> 8;
2596 uint32_t byteTowrite = m_bufferedByte + carry;
2597 m_bitIf->writeByte(byteTowrite);
2598
2599 byteTowrite = (0xff + carry) & 0xff;
2600 while (numBufferedBytes > 1)
2601 {
2602 m_bitIf->writeByte(byteTowrite);
2603 numBufferedBytes--;
2604 }
2605 }
2606 m_numBufferedBytes = 1;
2607 m_bufferedByte = (uint8_t)leadByte;
2608 }
2609 }
2610
2611 const uint32_t g_entropyBits[128] =
2612 {
2613 // Corrected table, most notably for last state
2614 0x07b23, 0x085f9, 0x074a0, 0x08cbc, 0x06ee4, 0x09354, 0x067f4, 0x09c1b, 0x060b0, 0x0a62a, 0x05a9c, 0x0af5b, 0x0548d, 0x0b955, 0x04f56, 0x0c2a9,
2615 0x04a87, 0x0cbf7, 0x045d6, 0x0d5c3, 0x04144, 0x0e01b, 0x03d88, 0x0e937, 0x039e0, 0x0f2cd, 0x03663, 0x0fc9e, 0x03347, 0x10600, 0x03050, 0x10f95,
2616 0x02d4d, 0x11a02, 0x02ad3, 0x12333, 0x0286e, 0x12cad, 0x02604, 0x136df, 0x02425, 0x13f48, 0x021f4, 0x149c4, 0x0203e, 0x1527b, 0x01e4d, 0x15d00,
2617 0x01c99, 0x166de, 0x01b18, 0x17017, 0x019a5, 0x17988, 0x01841, 0x18327, 0x016df, 0x18d50, 0x015d9, 0x19547, 0x0147c, 0x1a083, 0x0138e, 0x1a8a3,
2618 0x01251, 0x1b418, 0x01166, 0x1bd27, 0x01068, 0x1c77b, 0x00f7f, 0x1d18e, 0x00eda, 0x1d91a, 0x00e19, 0x1e254, 0x00d4f, 0x1ec9a, 0x00c90, 0x1f6e0,
2619 0x00c01, 0x1fef8, 0x00b5f, 0x208b1, 0x00ab6, 0x21362, 0x00a15, 0x21e46, 0x00988, 0x2285d, 0x00934, 0x22ea8, 0x008a8, 0x239b2, 0x0081d, 0x24577,
2620 0x007c9, 0x24ce6, 0x00763, 0x25663, 0x00710, 0x25e8f, 0x006a0, 0x26a26, 0x00672, 0x26f23, 0x005e8, 0x27ef8, 0x005ba, 0x284b5, 0x0055e, 0x29057,
2621 0x0050c, 0x29bab, 0x004c1, 0x2a674, 0x004a7, 0x2aa5e, 0x0046f, 0x2b32f, 0x0041f, 0x2c0ad, 0x003e7, 0x2ca8d, 0x003ba, 0x2d323, 0x0010c, 0x3bfbb
2622 };
2623
2624 const uint8_t g_nextState[128][2] =
2625 {
2626 { 2, 1 }, { 0, 3 }, { 4, 0 }, { 1, 5 }, { 6, 2 }, { 3, 7 }, { 8, 4 }, { 5, 9 },
2627 { 10, 4 }, { 5, 11 }, { 12, 8 }, { 9, 13 }, { 14, 8 }, { 9, 15 }, { 16, 10 }, { 11, 17 },
2628 { 18, 12 }, { 13, 19 }, { 20, 14 }, { 15, 21 }, { 22, 16 }, { 17, 23 }, { 24, 18 }, { 19, 25 },
2629 { 26, 18 }, { 19, 27 }, { 28, 22 }, { 23, 29 }, { 30, 22 }, { 23, 31 }, { 32, 24 }, { 25, 33 },
2630 { 34, 26 }, { 27, 35 }, { 36, 26 }, { 27, 37 }, { 38, 30 }, { 31, 39 }, { 40, 30 }, { 31, 41 },
2631 { 42, 32 }, { 33, 43 }, { 44, 32 }, { 33, 45 }, { 46, 36 }, { 37, 47 }, { 48, 36 }, { 37, 49 },
2632 { 50, 38 }, { 39, 51 }, { 52, 38 }, { 39, 53 }, { 54, 42 }, { 43, 55 }, { 56, 42 }, { 43, 57 },
2633 { 58, 44 }, { 45, 59 }, { 60, 44 }, { 45, 61 }, { 62, 46 }, { 47, 63 }, { 64, 48 }, { 49, 65 },
2634 { 66, 48 }, { 49, 67 }, { 68, 50 }, { 51, 69 }, { 70, 52 }, { 53, 71 }, { 72, 52 }, { 53, 73 },
2635 { 74, 54 }, { 55, 75 }, { 76, 54 }, { 55, 77 }, { 78, 56 }, { 57, 79 }, { 80, 58 }, { 59, 81 },
2636 { 82, 58 }, { 59, 83 }, { 84, 60 }, { 61, 85 }, { 86, 60 }, { 61, 87 }, { 88, 60 }, { 61, 89 },
2637 { 90, 62 }, { 63, 91 }, { 92, 64 }, { 65, 93 }, { 94, 64 }, { 65, 95 }, { 96, 66 }, { 67, 97 },
2638 { 98, 66 }, { 67, 99 }, { 100, 66 }, { 67, 101 }, { 102, 68 }, { 69, 103 }, { 104, 68 }, { 69, 105 },
2639 { 106, 70 }, { 71, 107 }, { 108, 70 }, { 71, 109 }, { 110, 70 }, { 71, 111 }, { 112, 72 }, { 73, 113 },
2640 { 114, 72 }, { 73, 115 }, { 116, 72 }, { 73, 117 }, { 118, 74 }, { 75, 119 }, { 120, 74 }, { 75, 121 },
2641 { 122, 74 }, { 75, 123 }, { 124, 76 }, { 77, 125 }, { 124, 76 }, { 77, 125 }, { 126, 126 }, { 127, 127 }
2642 };
2643
2644 }
2645
2646 // [8 24] --> [stateMPS BitCost], [stateLPS BitCost]
2647 extern "C" const uint32_t PFX(entropyStateBits)[128] =
2648 {
2649 // Corrected table, most notably for last state
2650 0x02007B23, 0x000085F9, 0x040074A0, 0x00008CBC, 0x06006EE4, 0x02009354, 0x080067F4, 0x04009C1B,
2651 0x0A0060B0, 0x0400A62A, 0x0C005A9C, 0x0800AF5B, 0x0E00548D, 0x0800B955, 0x10004F56, 0x0A00C2A9,
2652 0x12004A87, 0x0C00CBF7, 0x140045D6, 0x0E00D5C3, 0x16004144, 0x1000E01B, 0x18003D88, 0x1200E937,
2653 0x1A0039E0, 0x1200F2CD, 0x1C003663, 0x1600FC9E, 0x1E003347, 0x16010600, 0x20003050, 0x18010F95,
2654 0x22002D4D, 0x1A011A02, 0x24002AD3, 0x1A012333, 0x2600286E, 0x1E012CAD, 0x28002604, 0x1E0136DF,
2655 0x2A002425, 0x20013F48, 0x2C0021F4, 0x200149C4, 0x2E00203E, 0x2401527B, 0x30001E4D, 0x24015D00,
2656 0x32001C99, 0x260166DE, 0x34001B18, 0x26017017, 0x360019A5, 0x2A017988, 0x38001841, 0x2A018327,
2657 0x3A0016DF, 0x2C018D50, 0x3C0015D9, 0x2C019547, 0x3E00147C, 0x2E01A083, 0x4000138E, 0x3001A8A3,
2658 0x42001251, 0x3001B418, 0x44001166, 0x3201BD27, 0x46001068, 0x3401C77B, 0x48000F7F, 0x3401D18E,
2659 0x4A000EDA, 0x3601D91A, 0x4C000E19, 0x3601E254, 0x4E000D4F, 0x3801EC9A, 0x50000C90, 0x3A01F6E0,
2660 0x52000C01, 0x3A01FEF8, 0x54000B5F, 0x3C0208B1, 0x56000AB6, 0x3C021362, 0x58000A15, 0x3C021E46,
2661 0x5A000988, 0x3E02285D, 0x5C000934, 0x40022EA8, 0x5E0008A8, 0x400239B2, 0x6000081D, 0x42024577,
2662 0x620007C9, 0x42024CE6, 0x64000763, 0x42025663, 0x66000710, 0x44025E8F, 0x680006A0, 0x44026A26,
2663 0x6A000672, 0x46026F23, 0x6C0005E8, 0x46027EF8, 0x6E0005BA, 0x460284B5, 0x7000055E, 0x48029057,
2664 0x7200050C, 0x48029BAB, 0x740004C1, 0x4802A674, 0x760004A7, 0x4A02AA5E, 0x7800046F, 0x4A02B32F,
2665 0x7A00041F, 0x4A02C0AD, 0x7C0003E7, 0x4C02CA8D, 0x7C0003BA, 0x4C02D323, 0x7E00010C, 0x7E03BFBB,
2666 };
2667
2668