1 /*****************************************************************************
2 * Copyright (C) 2013-2020 MulticoreWare, Inc
3 *
4 * Authors: Steve Borho <steve@borho.org>
5 *          Min Chen <chenm003@163.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
20 *
21 * This program is also available under a commercial proprietary license.
22 * For more information, contact us at license @ x265.com.
23 *****************************************************************************/
24 
25 #include "common.h"
26 #include "framedata.h"
27 #include "scalinglist.h"
28 #include "quant.h"
29 #include "contexts.h"
30 #include "picyuv.h"
31 
32 #include "sao.h"
33 #include "entropy.h"
34 
35 #define CU_DQP_TU_CMAX 5 // max number bins for truncated unary
36 #define CU_DQP_EG_k    0 // exp-golomb order
37 #define START_VALUE    8 // start value for dpcm mode
38 
39 namespace X265_NS {
40 
41 // initial probability for cu_transquant_bypass flag
42 static const uint8_t INIT_CU_TRANSQUANT_BYPASS_FLAG[3][NUM_TQUANT_BYPASS_FLAG_CTX] =
43 {
44     { 154 },
45     { 154 },
46     { 154 },
47 };
48 
49 // initial probability for split flag
50 static const uint8_t INIT_SPLIT_FLAG[3][NUM_SPLIT_FLAG_CTX] =
51 {
52     { 107,  139,  126, },
53     { 107,  139,  126, },
54     { 139,  141,  157, },
55 };
56 
57 static const uint8_t INIT_SKIP_FLAG[3][NUM_SKIP_FLAG_CTX] =
58 {
59     { 197,  185,  201, },
60     { 197,  185,  201, },
61     { CNU,  CNU,  CNU, },
62 };
63 
64 static const uint8_t INIT_MERGE_FLAG_EXT[3][NUM_MERGE_FLAG_EXT_CTX] =
65 {
66     { 154, },
67     { 110, },
68     { CNU, },
69 };
70 
71 static const uint8_t INIT_MERGE_IDX_EXT[3][NUM_MERGE_IDX_EXT_CTX] =
72 {
73     { 137, },
74     { 122, },
75     { CNU, },
76 };
77 
78 static const uint8_t INIT_PART_SIZE[3][NUM_PART_SIZE_CTX] =
79 {
80     { 154,  139,  154, 154 },
81     { 154,  139,  154, 154 },
82     { 184,  CNU,  CNU, CNU },
83 };
84 
85 static const uint8_t INIT_PRED_MODE[3][NUM_PRED_MODE_CTX] =
86 {
87     { 134, },
88     { 149, },
89     { CNU, },
90 };
91 
92 static const uint8_t INIT_INTRA_PRED_MODE[3][NUM_ADI_CTX] =
93 {
94     { 183, },
95     { 154, },
96     { 184, },
97 };
98 
99 static const uint8_t INIT_CHROMA_PRED_MODE[3][NUM_CHROMA_PRED_CTX] =
100 {
101     { 152,  139, },
102     { 152,  139, },
103     {  63,  139, },
104 };
105 
106 static const uint8_t INIT_INTER_DIR[3][NUM_INTER_DIR_CTX] =
107 {
108     {  95,   79,   63,   31,  31, },
109     {  95,   79,   63,   31,  31, },
110     { CNU,  CNU,  CNU,  CNU, CNU, },
111 };
112 
113 static const uint8_t INIT_MVD[3][NUM_MV_RES_CTX] =
114 {
115     { 169,  198, },
116     { 140,  198, },
117     { CNU,  CNU, },
118 };
119 
120 static const uint8_t INIT_REF_PIC[3][NUM_REF_NO_CTX] =
121 {
122     { 153,  153 },
123     { 153,  153 },
124     { CNU,  CNU },
125 };
126 
127 static const uint8_t INIT_DQP[3][NUM_DELTA_QP_CTX] =
128 {
129     { 154,  154,  154, },
130     { 154,  154,  154, },
131     { 154,  154,  154, },
132 };
133 
134 static const uint8_t INIT_QT_CBF[3][NUM_QT_CBF_CTX] =
135 {
136     { 153,  111,  149,   92,  167,  154,  154 },
137     { 153,  111,  149,  107,  167,  154,  154 },
138     { 111,  141,   94,  138,  182,  154,  154 },
139 };
140 
141 static const uint8_t INIT_QT_ROOT_CBF[3][NUM_QT_ROOT_CBF_CTX] =
142 {
143     {  79, },
144     {  79, },
145     { CNU, },
146 };
147 
148 static const uint8_t INIT_LAST[3][NUM_CTX_LAST_FLAG_XY] =
149 {
150     { 125,  110,  124,  110,   95,   94,  125,  111,  111,   79,  125,  126,  111,  111,   79,
151       108,  123,   93 },
152     { 125,  110,   94,  110,   95,   79,  125,  111,  110,   78,  110,  111,  111,   95,   94,
153       108,  123,  108 },
154     { 110,  110,  124,  125,  140,  153,  125,  127,  140,  109,  111,  143,  127,  111,   79,
155       108,  123,   63 },
156 };
157 
158 static const uint8_t INIT_SIG_CG_FLAG[3][2 * NUM_SIG_CG_FLAG_CTX] =
159 {
160     { 121,  140,
161       61,  154, },
162     { 121,  140,
163       61,  154, },
164     {  91,  171,
165        134,  141, },
166 };
167 
168 static const uint8_t INIT_SIG_FLAG[3][NUM_SIG_FLAG_CTX] =
169 {
170     { 170,  154,  139,  153,  139,  123,  123,   63,  124,  166,  183,  140,  136,  153,  154,  166,  183,  140,  136,  153,  154,  166,  183,  140,  136,  153,  154,  170,  153,  138,  138,  122,  121,  122,  121,  167,  151,  183,  140,  151,  183,  140,  },
171     { 155,  154,  139,  153,  139,  123,  123,   63,  153,  166,  183,  140,  136,  153,  154,  166,  183,  140,  136,  153,  154,  166,  183,  140,  136,  153,  154,  170,  153,  123,  123,  107,  121,  107,  121,  167,  151,  183,  140,  151,  183,  140,  },
172     { 111,  111,  125,  110,  110,   94,  124,  108,  124,  107,  125,  141,  179,  153,  125,  107,  125,  141,  179,  153,  125,  107,  125,  141,  179,  153,  125,  140,  139,  182,  182,  152,  136,  152,  136,  153,  136,  139,  111,  136,  139,  111,  },
173 };
174 
175 static const uint8_t INIT_ONE_FLAG[3][NUM_ONE_FLAG_CTX] =
176 {
177     { 154,  196,  167,  167,  154,  152,  167,  182,  182,  134,  149,  136,  153,  121,  136,  122,  169,  208,  166,  167,  154,  152,  167,  182, },
178     { 154,  196,  196,  167,  154,  152,  167,  182,  182,  134,  149,  136,  153,  121,  136,  137,  169,  194,  166,  167,  154,  167,  137,  182, },
179     { 140,   92,  137,  138,  140,  152,  138,  139,  153,   74,  149,   92,  139,  107,  122,  152,  140,  179,  166,  182,  140,  227,  122,  197, },
180 };
181 
182 static const uint8_t INIT_ABS_FLAG[3][NUM_ABS_FLAG_CTX] =
183 {
184     { 107,  167,   91,  107,  107,  167, },
185     { 107,  167,   91,  122,  107,  167, },
186     { 138,  153,  136,  167,  152,  152, },
187 };
188 
189 static const uint8_t INIT_MVP_IDX[3][NUM_MVP_IDX_CTX] =
190 {
191     { 168 },
192     { 168 },
193     { CNU },
194 };
195 
196 static const uint8_t INIT_SAO_MERGE_FLAG[3][NUM_SAO_MERGE_FLAG_CTX] =
197 {
198     { 153,  },
199     { 153,  },
200     { 153,  },
201 };
202 
203 static const uint8_t INIT_SAO_TYPE_IDX[3][NUM_SAO_TYPE_IDX_CTX] =
204 {
205     { 160, },
206     { 185, },
207     { 200, },
208 };
209 
210 static const uint8_t INIT_TRANS_SUBDIV_FLAG[3][NUM_TRANS_SUBDIV_FLAG_CTX] =
211 {
212     { 224,  167,  122, },
213     { 124,  138,   94, },
214     { 153,  138,  138, },
215 };
216 
217 static const uint8_t INIT_TRANSFORMSKIP_FLAG[3][2 * NUM_TRANSFORMSKIP_FLAG_CTX] =
218 {
219     { 139,  139 },
220     { 139,  139 },
221     { 139,  139 },
222 };
223 
Entropy()224 Entropy::Entropy()
225 {
226     markValid();
227     m_fracBits = 0;
228     m_pad = 0;
229     m_meanQP = 0;
230     X265_CHECK(sizeof(m_contextState) >= sizeof(m_contextState[0]) * MAX_OFF_CTX_MOD, "context state table is too small\n");
231 }
232 
codeVPS(const VPS & vps)233 void Entropy::codeVPS(const VPS& vps)
234 {
235     WRITE_CODE(0,       4, "vps_video_parameter_set_id");
236     WRITE_CODE(3,       2, "vps_reserved_three_2bits");
237     WRITE_CODE(0,       6, "vps_reserved_zero_6bits");
238     WRITE_CODE(vps.maxTempSubLayers - 1, 3, "vps_max_sub_layers_minus1");
239     WRITE_FLAG(vps.maxTempSubLayers == 1,   "vps_temporal_id_nesting_flag");
240     WRITE_CODE(0xffff, 16, "vps_reserved_ffff_16bits");
241 
242     codeProfileTier(vps.ptl, vps.maxTempSubLayers);
243 
244     WRITE_FLAG(true, "vps_sub_layer_ordering_info_present_flag");
245 
246     for (uint32_t i = 0; i < vps.maxTempSubLayers; i++)
247     {
248         WRITE_UVLC(vps.maxDecPicBuffering - 1, "vps_max_dec_pic_buffering_minus1[i]");
249         WRITE_UVLC(vps.numReorderPics,         "vps_num_reorder_pics[i]");
250         WRITE_UVLC(vps.maxLatencyIncrease + 1, "vps_max_latency_increase_plus1[i]");
251     }
252 
253     WRITE_CODE(0, 6, "vps_max_nuh_reserved_zero_layer_id");
254     WRITE_UVLC(0,    "vps_max_op_sets_minus1");
255     WRITE_FLAG(0,    "vps_timing_info_present_flag"); /* we signal timing info in SPS-VUI */
256     WRITE_FLAG(0,    "vps_extension_flag");
257 }
258 
codeSPS(const SPS & sps,const ScalingList & scalingList,const ProfileTierLevel & ptl)259 void Entropy::codeSPS(const SPS& sps, const ScalingList& scalingList, const ProfileTierLevel& ptl)
260 {
261     WRITE_CODE(0, 4, "sps_video_parameter_set_id");
262     WRITE_CODE(sps.maxTempSubLayers - 1, 3, "sps_max_sub_layers_minus1");
263     WRITE_FLAG(sps.maxTempSubLayers == 1,   "sps_temporal_id_nesting_flag");
264 
265     codeProfileTier(ptl, sps.maxTempSubLayers);
266 
267     WRITE_UVLC(0, "sps_seq_parameter_set_id");
268     WRITE_UVLC(sps.chromaFormatIdc, "chroma_format_idc");
269 
270     if (sps.chromaFormatIdc == X265_CSP_I444)
271         WRITE_FLAG(0,                       "separate_colour_plane_flag");
272 
273     WRITE_UVLC(sps.picWidthInLumaSamples,   "pic_width_in_luma_samples");
274     WRITE_UVLC(sps.picHeightInLumaSamples,  "pic_height_in_luma_samples");
275 
276     const Window& conf = sps.conformanceWindow;
277     WRITE_FLAG(conf.bEnabled, "conformance_window_flag");
278     if (conf.bEnabled)
279     {
280         int hShift = CHROMA_H_SHIFT(sps.chromaFormatIdc), vShift = CHROMA_V_SHIFT(sps.chromaFormatIdc);
281         WRITE_UVLC(conf.leftOffset   >> hShift, "conf_win_left_offset");
282         WRITE_UVLC(conf.rightOffset  >> hShift, "conf_win_right_offset");
283         WRITE_UVLC(conf.topOffset    >> vShift, "conf_win_top_offset");
284         WRITE_UVLC(conf.bottomOffset >> vShift, "conf_win_bottom_offset");
285     }
286 
287     WRITE_UVLC(X265_DEPTH - 8,   "bit_depth_luma_minus8");
288     WRITE_UVLC(X265_DEPTH - 8,   "bit_depth_chroma_minus8");
289     WRITE_UVLC(sps.log2MaxPocLsb - 4, "log2_max_pic_order_cnt_lsb_minus4");
290     WRITE_FLAG(true,             "sps_sub_layer_ordering_info_present_flag");
291 
292     for (uint32_t i = 0; i < sps.maxTempSubLayers; i++)
293     {
294         WRITE_UVLC(sps.maxDecPicBuffering - 1, "sps_max_dec_pic_buffering_minus1[i]");
295         WRITE_UVLC(sps.numReorderPics,         "sps_num_reorder_pics[i]");
296         WRITE_UVLC(sps.maxLatencyIncrease + 1, "sps_max_latency_increase_plus1[i]");
297     }
298 
299     WRITE_UVLC(sps.log2MinCodingBlockSize - 3,    "log2_min_coding_block_size_minus3");
300     WRITE_UVLC(sps.log2DiffMaxMinCodingBlockSize, "log2_diff_max_min_coding_block_size");
301     WRITE_UVLC(sps.quadtreeTULog2MinSize - 2,     "log2_min_transform_block_size_minus2");
302     WRITE_UVLC(sps.quadtreeTULog2MaxSize - sps.quadtreeTULog2MinSize, "log2_diff_max_min_transform_block_size");
303     WRITE_UVLC(sps.quadtreeTUMaxDepthInter - 1,   "max_transform_hierarchy_depth_inter");
304     WRITE_UVLC(sps.quadtreeTUMaxDepthIntra - 1,   "max_transform_hierarchy_depth_intra");
305     WRITE_FLAG(scalingList.m_bEnabled,            "scaling_list_enabled_flag");
306     if (scalingList.m_bEnabled)
307     {
308         WRITE_FLAG(scalingList.m_bDataPresent,    "sps_scaling_list_data_present_flag");
309         if (scalingList.m_bDataPresent)
310             codeScalingList(scalingList);
311     }
312     WRITE_FLAG(sps.bUseAMP, "amp_enabled_flag");
313     WRITE_FLAG(sps.bUseSAO, "sample_adaptive_offset_enabled_flag");
314 
315     WRITE_FLAG(0, "pcm_enabled_flag");
316     WRITE_UVLC(sps.spsrpsNum, "num_short_term_ref_pic_sets");
317     for (int i = 0; i < sps.spsrpsNum; i++)
318         codeShortTermRefPicSet(sps.spsrps[i], i);
319     WRITE_FLAG(0, "long_term_ref_pics_present_flag");
320 
321     WRITE_FLAG(sps.bTemporalMVPEnabled, "sps_temporal_mvp_enable_flag");
322     WRITE_FLAG(sps.bUseStrongIntraSmoothing, "sps_strong_intra_smoothing_enable_flag");
323 
324     WRITE_FLAG(1, "vui_parameters_present_flag");
325     codeVUI(sps.vuiParameters, sps.maxTempSubLayers, sps.bEmitVUITimingInfo, sps.bEmitVUIHRDInfo);
326 
327     WRITE_FLAG(0, "sps_extension_flag");
328 }
329 
codePPS(const PPS & pps,bool filerAcross,int iPPSInitQpMinus26)330 void Entropy::codePPS( const PPS& pps, bool filerAcross, int iPPSInitQpMinus26 )
331 {
332     WRITE_UVLC(0,                          "pps_pic_parameter_set_id");
333     WRITE_UVLC(0,                          "pps_seq_parameter_set_id");
334     WRITE_FLAG(0,                          "dependent_slice_segments_enabled_flag");
335     WRITE_FLAG(0,                          "output_flag_present_flag");
336     WRITE_CODE(0, 3,                       "num_extra_slice_header_bits");
337     WRITE_FLAG(pps.bSignHideEnabled,       "sign_data_hiding_flag");
338     WRITE_FLAG(0,                          "cabac_init_present_flag");
339     WRITE_UVLC(pps.numRefIdxDefault[0] - 1, "num_ref_idx_l0_default_active_minus1");
340     WRITE_UVLC(pps.numRefIdxDefault[1] - 1, "num_ref_idx_l1_default_active_minus1");
341 
342     WRITE_SVLC(iPPSInitQpMinus26,         "init_qp_minus26");
343     WRITE_FLAG(pps.bConstrainedIntraPred, "constrained_intra_pred_flag");
344     WRITE_FLAG(pps.bTransformSkipEnabled, "transform_skip_enabled_flag");
345 
346     WRITE_FLAG(pps.bUseDQP,                "cu_qp_delta_enabled_flag");
347     if (pps.bUseDQP)
348         WRITE_UVLC(pps.maxCuDQPDepth,      "diff_cu_qp_delta_depth");
349 
350     WRITE_SVLC(pps.chromaQpOffset[0],      "pps_cb_qp_offset");
351     WRITE_SVLC(pps.chromaQpOffset[1],      "pps_cr_qp_offset");
352     WRITE_FLAG(pps.pps_slice_chroma_qp_offsets_present_flag, "pps_slice_chroma_qp_offsets_present_flag");
353 
354     WRITE_FLAG(pps.bUseWeightPred,            "weighted_pred_flag");
355     WRITE_FLAG(pps.bUseWeightedBiPred,        "weighted_bipred_flag");
356     WRITE_FLAG(pps.bTransquantBypassEnabled,  "transquant_bypass_enable_flag");
357     WRITE_FLAG(0,                             "tiles_enabled_flag");
358     WRITE_FLAG(pps.bEntropyCodingSyncEnabled, "entropy_coding_sync_enabled_flag");
359     WRITE_FLAG(filerAcross,                   "loop_filter_across_slices_enabled_flag");
360 
361     WRITE_FLAG(pps.bDeblockingFilterControlPresent, "deblocking_filter_control_present_flag");
362     if (pps.bDeblockingFilterControlPresent)
363     {
364         WRITE_FLAG(0,                               "deblocking_filter_override_enabled_flag");
365         WRITE_FLAG(pps.bPicDisableDeblockingFilter, "pps_disable_deblocking_filter_flag");
366         if (!pps.bPicDisableDeblockingFilter)
367         {
368             WRITE_SVLC(pps.deblockingFilterBetaOffsetDiv2, "pps_beta_offset_div2");
369             WRITE_SVLC(pps.deblockingFilterTcOffsetDiv2,   "pps_tc_offset_div2");
370         }
371     }
372 
373     WRITE_FLAG(0, "pps_scaling_list_data_present_flag");
374     WRITE_FLAG(0, "lists_modification_present_flag");
375     WRITE_UVLC(0, "log2_parallel_merge_level_minus2");
376     WRITE_FLAG(0, "slice_segment_header_extension_present_flag");
377     WRITE_FLAG(0, "pps_extension_flag");
378 }
379 
codeProfileTier(const ProfileTierLevel & ptl,int maxTempSubLayers)380 void Entropy::codeProfileTier(const ProfileTierLevel& ptl, int maxTempSubLayers)
381 {
382     WRITE_CODE(0, 2,                "XXX_profile_space[]");
383     WRITE_FLAG(ptl.tierFlag,        "XXX_tier_flag[]");
384     WRITE_CODE(ptl.profileIdc, 5,   "XXX_profile_idc[]");
385     for (int j = 0; j < 32; j++)
386         WRITE_FLAG(ptl.profileCompatibilityFlag[j], "XXX_profile_compatibility_flag[][j]");
387 
388     WRITE_FLAG(ptl.progressiveSourceFlag,   "general_progressive_source_flag");
389     WRITE_FLAG(ptl.interlacedSourceFlag,    "general_interlaced_source_flag");
390     WRITE_FLAG(ptl.nonPackedConstraintFlag, "general_non_packed_constraint_flag");
391     WRITE_FLAG(ptl.frameOnlyConstraintFlag, "general_frame_only_constraint_flag");
392 
393     if (ptl.profileIdc == Profile::MAINREXT || ptl.profileIdc == Profile::HIGHTHROUGHPUTREXT)
394     {
395         uint32_t bitDepthConstraint = ptl.bitDepthConstraint;
396         int csp = ptl.chromaFormatConstraint;
397         WRITE_FLAG(bitDepthConstraint<=12, "general_max_12bit_constraint_flag");
398         WRITE_FLAG(bitDepthConstraint<=10, "general_max_10bit_constraint_flag");
399         WRITE_FLAG(bitDepthConstraint<= 8 && csp != X265_CSP_I422 , "general_max_8bit_constraint_flag");
400         WRITE_FLAG(csp == X265_CSP_I422 || csp == X265_CSP_I420 || csp == X265_CSP_I400, "general_max_422chroma_constraint_flag");
401         WRITE_FLAG(csp == X265_CSP_I420 || csp == X265_CSP_I400,                         "general_max_420chroma_constraint_flag");
402         WRITE_FLAG(csp == X265_CSP_I400,                                                 "general_max_monochrome_constraint_flag");
403         WRITE_FLAG(ptl.intraConstraintFlag,        "general_intra_constraint_flag");
404         WRITE_FLAG(ptl.onePictureOnlyConstraintFlag,"general_one_picture_only_constraint_flag");
405         WRITE_FLAG(ptl.lowerBitRateConstraintFlag, "general_lower_bit_rate_constraint_flag");
406         WRITE_CODE(0 , 16, "XXX_reserved_zero_35bits[0..15]");
407         WRITE_CODE(0 , 16, "XXX_reserved_zero_35bits[16..31]");
408         WRITE_CODE(0 ,  3, "XXX_reserved_zero_35bits[32..34]");
409     }
410     else
411     {
412         WRITE_CODE(0, 16, "XXX_reserved_zero_44bits[0..15]");
413         WRITE_CODE(0, 16, "XXX_reserved_zero_44bits[16..31]");
414         WRITE_CODE(0, 12, "XXX_reserved_zero_44bits[32..43]");
415     }
416 
417     WRITE_CODE(ptl.levelIdc, 8, "general_level_idc");
418 
419     if (maxTempSubLayers > 1)
420     {
421          WRITE_FLAG(0, "sub_layer_profile_present_flag[i]");
422          WRITE_FLAG(0, "sub_layer_level_present_flag[i]");
423          for (int i = maxTempSubLayers - 1; i < 8 ; i++)
424              WRITE_CODE(0, 2, "reserved_zero_2bits");
425     }
426 }
427 
codeVUI(const VUI & vui,int maxSubTLayers,bool bEmitVUITimingInfo,bool bEmitVUIHRDInfo)428 void Entropy::codeVUI(const VUI& vui, int maxSubTLayers, bool bEmitVUITimingInfo, bool bEmitVUIHRDInfo)
429 {
430     WRITE_FLAG(vui.aspectRatioInfoPresentFlag, "aspect_ratio_info_present_flag");
431     if (vui.aspectRatioInfoPresentFlag)
432     {
433         WRITE_CODE(vui.aspectRatioIdc, 8, "aspect_ratio_idc");
434         if (vui.aspectRatioIdc == 255)
435         {
436             WRITE_CODE(vui.sarWidth, 16, "sar_width");
437             WRITE_CODE(vui.sarHeight, 16, "sar_height");
438         }
439     }
440 
441     WRITE_FLAG(vui.overscanInfoPresentFlag, "overscan_info_present_flag");
442     if (vui.overscanInfoPresentFlag)
443         WRITE_FLAG(vui.overscanAppropriateFlag, "overscan_appropriate_flag");
444 
445     WRITE_FLAG(vui.videoSignalTypePresentFlag, "video_signal_type_present_flag");
446     if (vui.videoSignalTypePresentFlag)
447     {
448         WRITE_CODE(vui.videoFormat, 3, "video_format");
449         WRITE_FLAG(vui.videoFullRangeFlag, "video_full_range_flag");
450         WRITE_FLAG(vui.colourDescriptionPresentFlag, "colour_description_present_flag");
451         if (vui.colourDescriptionPresentFlag)
452         {
453             WRITE_CODE(vui.colourPrimaries, 8, "colour_primaries");
454             WRITE_CODE(vui.transferCharacteristics, 8, "transfer_characteristics");
455             WRITE_CODE(vui.matrixCoefficients, 8, "matrix_coefficients");
456         }
457     }
458 
459     WRITE_FLAG(vui.chromaLocInfoPresentFlag, "chroma_loc_info_present_flag");
460     if (vui.chromaLocInfoPresentFlag)
461     {
462         WRITE_UVLC(vui.chromaSampleLocTypeTopField, "chroma_sample_loc_type_top_field");
463         WRITE_UVLC(vui.chromaSampleLocTypeBottomField, "chroma_sample_loc_type_bottom_field");
464     }
465 
466     WRITE_FLAG(0, "neutral_chroma_indication_flag");
467     WRITE_FLAG(vui.fieldSeqFlag, "field_seq_flag");
468     WRITE_FLAG(vui.frameFieldInfoPresentFlag, "frame_field_info_present_flag");
469 
470     WRITE_FLAG(vui.defaultDisplayWindow.bEnabled, "default_display_window_flag");
471     if (vui.defaultDisplayWindow.bEnabled)
472     {
473         WRITE_UVLC(vui.defaultDisplayWindow.leftOffset, "def_disp_win_left_offset");
474         WRITE_UVLC(vui.defaultDisplayWindow.rightOffset, "def_disp_win_right_offset");
475         WRITE_UVLC(vui.defaultDisplayWindow.topOffset, "def_disp_win_top_offset");
476         WRITE_UVLC(vui.defaultDisplayWindow.bottomOffset, "def_disp_win_bottom_offset");
477     }
478 
479     if (!bEmitVUITimingInfo)
480         WRITE_FLAG(0, "vui_timing_info_present_flag");
481     else
482     {
483         WRITE_FLAG(1, "vui_timing_info_present_flag");
484         WRITE_CODE(vui.timingInfo.numUnitsInTick, 32, "vui_num_units_in_tick");
485         WRITE_CODE(vui.timingInfo.timeScale, 32, "vui_time_scale");
486         WRITE_FLAG(0, "vui_poc_proportional_to_timing_flag");
487     }
488 
489     if (!bEmitVUIHRDInfo)
490         WRITE_FLAG(0, "vui_hrd_parameters_present_flag");
491     else
492     {
493         WRITE_FLAG(vui.hrdParametersPresentFlag, "vui_hrd_parameters_present_flag");
494         if (vui.hrdParametersPresentFlag)
495             codeHrdParameters(vui.hrdParameters, maxSubTLayers);
496     }
497 
498     WRITE_FLAG(0, "bitstream_restriction_flag");
499 }
500 
codeScalingList(const ScalingList & scalingList)501 void Entropy::codeScalingList(const ScalingList& scalingList)
502 {
503     for (int sizeId = 0; sizeId < ScalingList::NUM_SIZES; sizeId++)
504     {
505         for (int listId = 0; listId < ScalingList::NUM_LISTS; listId += (sizeId == 3) ? 3 : 1)
506         {
507             int predList = scalingList.checkPredMode(sizeId, listId);
508             WRITE_FLAG(predList < 0, "scaling_list_pred_mode_flag");
509             if (predList >= 0)
510                 WRITE_UVLC(listId - predList, "scaling_list_pred_matrix_id_delta");
511             else // DPCM Mode
512                 codeScalingList(scalingList, sizeId, listId);
513         }
514     }
515 }
516 
codeScalingList(const ScalingList & scalingList,uint32_t sizeId,uint32_t listId)517 void Entropy::codeScalingList(const ScalingList& scalingList, uint32_t sizeId, uint32_t listId)
518 {
519     int coefNum = X265_MIN(ScalingList::MAX_MATRIX_COEF_NUM, (int)ScalingList::s_numCoefPerSize[sizeId]);
520     const uint16_t* scan = (sizeId == 0 ? g_scan4x4[SCAN_DIAG] : g_scan8x8diag);
521     int nextCoef = START_VALUE;
522     int32_t *src = scalingList.m_scalingListCoef[sizeId][listId];
523     int data;
524 
525     if (sizeId > BLOCK_8x8)
526     {
527         WRITE_SVLC(scalingList.m_scalingListDC[sizeId][listId] - 8, "scaling_list_dc_coef_minus8");
528         nextCoef = scalingList.m_scalingListDC[sizeId][listId];
529     }
530     for (int i = 0; i < coefNum; i++)
531     {
532         data = src[scan[i]] - nextCoef;
533         if (data < -128)
534             data += 256;
535         if (data > 127)
536             data -= 256;
537         nextCoef = (nextCoef + data + 256) % 256;
538         WRITE_SVLC(data,  "scaling_list_delta_coef");
539     }
540 }
541 
codeHrdParameters(const HRDInfo & hrd,int maxSubTLayers)542 void Entropy::codeHrdParameters(const HRDInfo& hrd, int maxSubTLayers)
543 {
544     WRITE_FLAG(1, "nal_hrd_parameters_present_flag");
545     WRITE_FLAG(0, "vcl_hrd_parameters_present_flag");
546     WRITE_FLAG(0, "sub_pic_hrd_params_present_flag");
547 
548     WRITE_CODE(hrd.bitRateScale, 4, "bit_rate_scale");
549     WRITE_CODE(hrd.cpbSizeScale, 4, "cpb_size_scale");
550 
551     WRITE_CODE(hrd.initialCpbRemovalDelayLength - 1, 5, "initial_cpb_removal_delay_length_minus1");
552     WRITE_CODE(hrd.cpbRemovalDelayLength - 1,        5, "au_cpb_removal_delay_length_minus1");
553     WRITE_CODE(hrd.dpbOutputDelayLength - 1,         5, "dpb_output_delay_length_minus1");
554 
555     for (int i = 0; i < maxSubTLayers; i++)
556     {
557         WRITE_FLAG(1, "fixed_pic_rate_general_flag");
558         WRITE_UVLC(0, "elemental_duration_in_tc_minus1");
559         WRITE_UVLC(0, "cpb_cnt_minus1");
560 
561         WRITE_UVLC(hrd.bitRateValue - 1, "bit_rate_value_minus1");
562         WRITE_UVLC(hrd.cpbSizeValue - 1, "cpb_size_value_minus1");
563         WRITE_FLAG(hrd.cbrFlag, "cbr_flag");
564     }
565 }
566 
codeAUD(const Slice & slice)567 void Entropy::codeAUD(const Slice& slice)
568 {
569     int picType;
570 
571     switch (slice.m_sliceType)
572     {
573     case I_SLICE:
574         picType = 0;
575         break;
576     case P_SLICE:
577         picType = 1;
578         break;
579     case B_SLICE:
580         picType = 2;
581         break;
582     default:
583         picType = 7;
584         break;
585     }
586 
587     WRITE_CODE(picType, 3, "pic_type");
588 }
589 
codeSliceHeader(const Slice & slice,FrameData & encData,uint32_t slice_addr,uint32_t slice_addr_bits,int sliceQp)590 void Entropy::codeSliceHeader(const Slice& slice, FrameData& encData, uint32_t slice_addr, uint32_t slice_addr_bits, int sliceQp)
591 {
592     WRITE_FLAG((slice_addr == 0 ? 1 : 0), "first_slice_segment_in_pic_flag");
593     if (slice.getRapPicFlag())
594         WRITE_FLAG(0, "no_output_of_prior_pics_flag");
595 
596     WRITE_UVLC(0, "slice_pic_parameter_set_id");
597 
598     /* x265 does not use dependent slices, so always write all this data */
599     if (slice_addr)
600     {
601         // if( dependent_slice_segments_enabled_flag )
602         //     dependent_slice_segment_flag             u(1)
603         WRITE_CODE(slice_addr, slice_addr_bits, "slice_segment_address");
604     }
605 
606     WRITE_UVLC(slice.m_sliceType, "slice_type");
607 
608     if (!slice.getIdrPicFlag())
609     {
610         int picOrderCntLSB = (slice.m_poc - slice.m_lastIDR + (1 << slice.m_sps->log2MaxPocLsb)) % (1 << slice.m_sps->log2MaxPocLsb);
611         WRITE_CODE(picOrderCntLSB, slice.m_sps->log2MaxPocLsb, "pic_order_cnt_lsb");
612 
613 #if _DEBUG || CHECKED_BUILD
614         // check for bitstream restriction stating that:
615         // If the current picture is a BLA or CRA picture, the value of NumPocTotalCurr shall be equal to 0.
616         // Ideally this process should not be repeated for each slice in a picture
617         if (slice.isIRAP())
618             for (int picIdx = 0; picIdx < slice.m_rps.numberOfPictures; picIdx++)
619             {
620                 X265_CHECK(!slice.m_rps.bUsed[picIdx], "pic unused failure\n");
621             }
622 #endif
623 
624         if (slice.m_rpsIdx < 0)
625         {
626             WRITE_FLAG(0, "short_term_ref_pic_set_sps_flag");
627             codeShortTermRefPicSet(slice.m_rps, slice.m_sps->spsrpsNum);
628         }
629         else
630         {
631             WRITE_FLAG(1, "short_term_ref_pic_set_sps_flag");
632             int numBits = 0;
633             while ((1 << numBits) < slice.m_iNumRPSInSPS)
634                 numBits++;
635 
636             if (numBits > 0)
637                 WRITE_CODE(slice.m_rpsIdx, numBits, "short_term_ref_pic_set_idx");
638         }
639 
640         if (slice.m_sps->bTemporalMVPEnabled)
641             WRITE_FLAG(1, "slice_temporal_mvp_enable_flag");
642     }
643     const SAOParam *saoParam = encData.m_saoParam;
644     if (slice.m_bUseSao)
645     {
646         WRITE_FLAG(saoParam->bSaoFlag[0], "slice_sao_luma_flag");
647         if (encData.m_param->internalCsp != X265_CSP_I400)
648             WRITE_FLAG(saoParam->bSaoFlag[1], "slice_sao_chroma_flag");
649     }
650     else if(encData.m_param->selectiveSAO)
651     {
652         WRITE_FLAG(0, "slice_sao_luma_flag");
653         if (encData.m_param->internalCsp != X265_CSP_I400)
654             WRITE_FLAG(0, "slice_sao_chroma_flag");
655     }
656 
657     // check if numRefIdx match the defaults (1, hard-coded in PPS). If not, override
658     // TODO: this might be a place to optimize a few bits per slice, by using param->refs for L0 default
659 
660     if (!slice.isIntra())
661     {
662         bool overrideFlag = (slice.m_numRefIdx[0] != slice.numRefIdxDefault[0] || (slice.isInterB() && slice.m_numRefIdx[1] != slice.numRefIdxDefault[1]));
663         WRITE_FLAG(overrideFlag, "num_ref_idx_active_override_flag");
664         if (overrideFlag)
665         {
666             WRITE_UVLC(slice.m_numRefIdx[0] - 1, "num_ref_idx_l0_active_minus1");
667             if (slice.isInterB())
668                 WRITE_UVLC(slice.m_numRefIdx[1] - 1, "num_ref_idx_l1_active_minus1");
669             else
670             {
671                 X265_CHECK(slice.m_numRefIdx[1] == 0, "expected no L1 references for P slice\n");
672             }
673         }
674     }
675     else
676     {
677         X265_CHECK(!slice.m_numRefIdx[0] && !slice.m_numRefIdx[1], "expected no references for I slice\n");
678     }
679 
680     if (slice.isInterB())
681         WRITE_FLAG(0, "mvd_l1_zero_flag");
682 
683     if (slice.m_sps->bTemporalMVPEnabled)
684     {
685         if (slice.m_sliceType == B_SLICE)
686             WRITE_FLAG(slice.m_colFromL0Flag, "collocated_from_l0_flag");
687 
688         if (slice.m_sliceType != I_SLICE &&
689             ((slice.m_colFromL0Flag && slice.m_numRefIdx[0] > 1) ||
690             (!slice.m_colFromL0Flag && slice.m_numRefIdx[1] > 1)))
691         {
692             WRITE_UVLC(slice.m_colRefIdx, "collocated_ref_idx");
693         }
694     }
695     if ((slice.m_pps->bUseWeightPred && slice.m_sliceType == P_SLICE) || (slice.m_pps->bUseWeightedBiPred && slice.m_sliceType == B_SLICE))
696         codePredWeightTable(slice);
697 
698     X265_CHECK(slice.m_maxNumMergeCand <= MRG_MAX_NUM_CANDS, "too many merge candidates\n");
699     if (!slice.isIntra())
700         WRITE_UVLC(MRG_MAX_NUM_CANDS - slice.m_maxNumMergeCand, "five_minus_max_num_merge_cand");
701 
702     int code = sliceQp - (slice.m_iPPSQpMinus26 + 26);
703     WRITE_SVLC(code, "slice_qp_delta");
704 
705     if (slice.m_pps->pps_slice_chroma_qp_offsets_present_flag)
706     {
707         WRITE_SVLC(slice.m_chromaQpOffset[0], "slice_cb_qp_offset");
708         WRITE_SVLC(slice.m_chromaQpOffset[1], "slice_cr_qp_offset");
709     }
710     // TODO: Enable when pps_loop_filter_across_slices_enabled_flag==1
711     //       We didn't support filter across slice board, so disable it now
712 
713     if (encData.m_param->maxSlices <= 1)
714     {
715         bool isSAOEnabled = slice.m_sps->bUseSAO && slice.m_bUseSao ? saoParam->bSaoFlag[0] || saoParam->bSaoFlag[1] : false;
716         bool isDBFEnabled = !slice.m_pps->bPicDisableDeblockingFilter;
717 
718         if (isSAOEnabled || isDBFEnabled)
719             WRITE_FLAG(slice.m_sLFaseFlag, "slice_loop_filter_across_slices_enabled_flag");
720     }
721 }
722 
723 /** write wavefront substreams sizes for the slice header */
codeSliceHeaderWPPEntryPoints(const uint32_t * substreamSizes,uint32_t numSubStreams,uint32_t maxOffset)724 void Entropy::codeSliceHeaderWPPEntryPoints(const uint32_t *substreamSizes, uint32_t numSubStreams, uint32_t maxOffset)
725 {
726     uint32_t offsetLen = 1;
727     while (maxOffset >= (1U << offsetLen))
728     {
729         offsetLen++;
730         X265_CHECK(offsetLen < 32, "offsetLen is too large\n");
731     }
732 
733     WRITE_UVLC(numSubStreams, "num_entry_point_offsets");
734     if (numSubStreams > 0)
735         WRITE_UVLC(offsetLen - 1, "offset_len_minus1");
736 
737     for (uint32_t i = 0; i < numSubStreams; i++)
738         WRITE_CODE(substreamSizes[i] - 1, offsetLen, "entry_point_offset_minus1");
739 }
740 
codeShortTermRefPicSet(const RPS & rps,int idx)741 void Entropy::codeShortTermRefPicSet(const RPS& rps, int idx)
742 {
743     if (idx > 0)
744         WRITE_FLAG(0, "inter_ref_pic_set_prediction_flag");
745 
746     WRITE_UVLC(rps.numberOfNegativePictures, "num_negative_pics");
747     WRITE_UVLC(rps.numberOfPositivePictures, "num_positive_pics");
748     int prev = 0;
749     for (int j = 0; j < rps.numberOfNegativePictures; j++)
750     {
751         WRITE_UVLC(prev - rps.deltaPOC[j] - 1, "delta_poc_s0_minus1");
752         prev = rps.deltaPOC[j];
753         WRITE_FLAG(rps.bUsed[j], "used_by_curr_pic_s0_flag");
754     }
755 
756     prev = 0;
757     for (int j = rps.numberOfNegativePictures; j < rps.numberOfNegativePictures + rps.numberOfPositivePictures; j++)
758     {
759         WRITE_UVLC(rps.deltaPOC[j] - prev - 1, "delta_poc_s1_minus1");
760         prev = rps.deltaPOC[j];
761         WRITE_FLAG(rps.bUsed[j], "used_by_curr_pic_s1_flag");
762     }
763 }
764 
encodeCTU(const CUData & ctu,const CUGeom & cuGeom)765 void Entropy::encodeCTU(const CUData& ctu, const CUGeom& cuGeom)
766 {
767     bool bEncodeDQP = ctu.m_slice->m_pps->bUseDQP;
768     encodeCU(ctu, cuGeom, 0, 0, bEncodeDQP);
769 }
770 
771 /* encode a CU block recursively */
encodeCU(const CUData & ctu,const CUGeom & cuGeom,uint32_t absPartIdx,uint32_t depth,bool & bEncodeDQP)772 void Entropy::encodeCU(const CUData& ctu, const CUGeom& cuGeom, uint32_t absPartIdx, uint32_t depth, bool& bEncodeDQP)
773 {
774     const Slice* slice = ctu.m_slice;
775 
776     int cuSplitFlag = !(cuGeom.flags & CUGeom::LEAF);
777     int cuUnsplitFlag = !(cuGeom.flags & CUGeom::SPLIT_MANDATORY);
778 
779     if (!cuUnsplitFlag)
780     {
781         uint32_t qNumParts = cuGeom.numPartitions >> 2;
782         if (depth == slice->m_pps->maxCuDQPDepth && slice->m_pps->bUseDQP)
783             bEncodeDQP = true;
784         for (uint32_t qIdx = 0; qIdx < 4; ++qIdx, absPartIdx += qNumParts)
785         {
786             const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + qIdx);
787             if (childGeom.flags & CUGeom::PRESENT)
788                 encodeCU(ctu, childGeom, absPartIdx, depth + 1, bEncodeDQP);
789         }
790         return;
791     }
792 
793     if (cuSplitFlag)
794         codeSplitFlag(ctu, absPartIdx, depth);
795 
796     if (depth < ctu.m_cuDepth[absPartIdx] && depth < ctu.m_encData->m_param->maxCUDepth)
797     {
798         uint32_t qNumParts = cuGeom.numPartitions >> 2;
799         if (depth == slice->m_pps->maxCuDQPDepth && slice->m_pps->bUseDQP)
800             bEncodeDQP = true;
801         for (uint32_t qIdx = 0; qIdx < 4; ++qIdx, absPartIdx += qNumParts)
802         {
803             const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + qIdx);
804             encodeCU(ctu, childGeom, absPartIdx, depth + 1, bEncodeDQP);
805         }
806         return;
807     }
808 
809     if (depth <= slice->m_pps->maxCuDQPDepth && slice->m_pps->bUseDQP)
810         bEncodeDQP = true;
811 
812     if (slice->m_pps->bTransquantBypassEnabled)
813         codeCUTransquantBypassFlag(ctu.m_tqBypass[absPartIdx]);
814 
815     if (!slice->isIntra())
816     {
817         codeSkipFlag(ctu, absPartIdx);
818         if (ctu.isSkipped(absPartIdx))
819         {
820             codeMergeIndex(ctu, absPartIdx);
821             finishCU(ctu, absPartIdx, depth, bEncodeDQP);
822             return;
823         }
824         codePredMode(ctu.m_predMode[absPartIdx]);
825     }
826 
827     codePartSize(ctu, absPartIdx, depth);
828 
829     // prediction Info ( Intra : direction mode, Inter : Mv, reference idx )
830     codePredInfo(ctu, absPartIdx);
831 
832     uint32_t tuDepthRange[2];
833     if (ctu.isIntra(absPartIdx))
834         ctu.getIntraTUQtDepthRange(tuDepthRange, absPartIdx);
835     else
836         ctu.getInterTUQtDepthRange(tuDepthRange, absPartIdx);
837 
838     // Encode Coefficients, allow codeCoeff() to modify bEncodeDQP
839     codeCoeff(ctu, absPartIdx, bEncodeDQP, tuDepthRange);
840 
841     // --- write terminating bit ---
842     finishCU(ctu, absPartIdx, depth, bEncodeDQP);
843 }
844 
845 /* Return bit count of signaling inter mode */
bitsInterMode(const CUData & cu,uint32_t absPartIdx,uint32_t depth) const846 uint32_t Entropy::bitsInterMode(const CUData& cu, uint32_t absPartIdx, uint32_t depth) const
847 {
848     uint32_t bits;
849     bits = bitsCodeBin(0, m_contextState[OFF_SKIP_FLAG_CTX + cu.getCtxSkipFlag(absPartIdx)]); /* not skip */
850     bits += bitsCodeBin(0, m_contextState[OFF_PRED_MODE_CTX]); /* inter */
851     PartSize partSize = (PartSize)cu.m_partSize[absPartIdx];
852     switch (partSize)
853     {
854     case SIZE_2Nx2N:
855         bits += bitsCodeBin(1, m_contextState[OFF_PART_SIZE_CTX]);
856         break;
857 
858     case SIZE_2NxN:
859     case SIZE_2NxnU:
860     case SIZE_2NxnD:
861         bits += bitsCodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
862         bits += bitsCodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 1]);
863         if (cu.m_slice->m_sps->maxAMPDepth > depth)
864         {
865             bits += bitsCodeBin((partSize == SIZE_2NxN) ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX + 3]);
866             if (partSize != SIZE_2NxN)
867                 bits++; // encodeBinEP((partSize == SIZE_2NxnU ? 0 : 1));
868         }
869         break;
870 
871     case SIZE_Nx2N:
872     case SIZE_nLx2N:
873     case SIZE_nRx2N:
874         bits += bitsCodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
875         bits += bitsCodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 1]);
876         if (depth == cu.m_encData->m_param->maxCUDepth && !(cu.m_log2CUSize[absPartIdx] == 3))
877             bits += bitsCodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 2]);
878         if (cu.m_slice->m_sps->maxAMPDepth > depth)
879         {
880             bits += bitsCodeBin((partSize == SIZE_Nx2N) ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX + 3]);
881             if (partSize != SIZE_Nx2N)
882                 bits++; // encodeBinEP((partSize == SIZE_nLx2N ? 0 : 1));
883         }
884         break;
885     default:
886         X265_CHECK(0, "invalid CU partition\n");
887         break;
888     }
889 
890     return bits;
891 }
892 
893 /* finish encoding a cu and handle end-of-slice conditions */
finishCU(const CUData & ctu,uint32_t absPartIdx,uint32_t depth,bool bCodeDQP)894 void Entropy::finishCU(const CUData& ctu, uint32_t absPartIdx, uint32_t depth, bool bCodeDQP)
895 {
896     const Slice* slice = ctu.m_slice;
897     uint32_t realEndAddress = slice->m_endCUAddr;
898     uint32_t cuAddr = ctu.getSCUAddr() + absPartIdx;
899     X265_CHECK(realEndAddress == slice->realEndAddress(slice->m_endCUAddr), "real end address expected\n");
900 
901     uint32_t granularityMask = ctu.m_encData->m_param->maxCUSize - 1;
902     uint32_t cuSize = 1 << ctu.m_log2CUSize[absPartIdx];
903     uint32_t rpelx = ctu.m_cuPelX + g_zscanToPelX[absPartIdx] + cuSize;
904     uint32_t bpely = ctu.m_cuPelY + g_zscanToPelY[absPartIdx] + cuSize;
905     bool granularityBoundary = (((rpelx & granularityMask) == 0 || (rpelx == slice->m_sps->picWidthInLumaSamples )) &&
906                                 ((bpely & granularityMask) == 0 || (bpely == slice->m_sps->picHeightInLumaSamples)));
907 
908     if (slice->m_pps->bUseDQP)
909         const_cast<CUData&>(ctu).setQPSubParts(bCodeDQP ? ctu.getRefQP(absPartIdx) : ctu.m_qp[absPartIdx], absPartIdx, depth);
910 
911     if (granularityBoundary)
912     {
913         // Encode slice finish
914         uint32_t bTerminateSlice = ctu.m_bLastCuInSlice;
915         if (cuAddr + (slice->m_param->num4x4Partitions >> (depth << 1)) == realEndAddress)
916             bTerminateSlice = 1;
917 
918         // The 1-terminating bit is added to all streams, so don't add it here when it's 1.
919         if (!bTerminateSlice)
920             encodeBinTrm(0);    // end_of_slice_segment_flag
921 
922         if (!m_bitIf)
923             resetBits(); // TODO: most likely unnecessary
924     }
925 }
926 
encodeTransform(const CUData & cu,uint32_t absPartIdx,uint32_t curDepth,uint32_t log2CurSize,bool & bCodeDQP,const uint32_t depthRange[2])927 void Entropy::encodeTransform(const CUData& cu, uint32_t absPartIdx, uint32_t curDepth, uint32_t log2CurSize,
928                               bool& bCodeDQP, const uint32_t depthRange[2])
929 {
930     const bool subdiv = cu.m_tuDepth[absPartIdx] > curDepth;
931 
932     /* in each of these conditions, the subdiv flag is implied and not signaled,
933      * so we have checks to make sure the implied value matches our intentions */
934     if (cu.isIntra(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N && log2CurSize == MIN_LOG2_CU_SIZE)
935     {
936         X265_CHECK(subdiv, "intra NxN requires TU depth below CU depth\n");
937     }
938     else if (cu.isInter(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N &&
939              !curDepth && cu.m_slice->m_sps->quadtreeTUMaxDepthInter == 1)
940     {
941         X265_CHECK(subdiv, "inter TU must be smaller than CU when not 2Nx2N part size: log2CurSize %d, depthRange[0] %d\n", log2CurSize, depthRange[0]);
942     }
943     else if (log2CurSize > depthRange[1])
944     {
945         X265_CHECK(subdiv, "TU is larger than the max allowed, it should have been split\n");
946     }
947     else if (log2CurSize == cu.m_slice->m_sps->quadtreeTULog2MinSize || log2CurSize == depthRange[0])
948     {
949         X265_CHECK(!subdiv, "min sized TU cannot be subdivided\n");
950     }
951     else
952     {
953         X265_CHECK(log2CurSize > depthRange[0], "transform size failure\n");
954         codeTransformSubdivFlag(subdiv, 5 - log2CurSize);
955     }
956 
957     uint32_t hChromaShift = cu.m_hChromaShift;
958     uint32_t vChromaShift = cu.m_vChromaShift;
959     bool bSmallChroma = (log2CurSize - hChromaShift) < 2;
960     if (!curDepth || !bSmallChroma)
961     {
962         uint32_t parentIdx = absPartIdx & (0xFF << (log2CurSize + 1 - LOG2_UNIT_SIZE) * 2);
963         if (!curDepth || cu.getCbf(parentIdx, TEXT_CHROMA_U, curDepth - 1))
964             codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_U, curDepth, !subdiv);
965         if (!curDepth || cu.getCbf(parentIdx, TEXT_CHROMA_V, curDepth - 1))
966             codeQtCbfChroma(cu, absPartIdx, TEXT_CHROMA_V, curDepth, !subdiv);
967     }
968 
969     if (subdiv)
970     {
971         --log2CurSize;
972         ++curDepth;
973 
974         uint32_t qNumParts = 1 << (log2CurSize - LOG2_UNIT_SIZE) * 2;
975 
976         encodeTransform(cu, absPartIdx + 0 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
977         encodeTransform(cu, absPartIdx + 1 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
978         encodeTransform(cu, absPartIdx + 2 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
979         encodeTransform(cu, absPartIdx + 3 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
980         return;
981     }
982 
983     uint32_t absPartIdxC = bSmallChroma ? absPartIdx & 0xFC : absPartIdx;
984 
985     if (cu.isInter(absPartIdxC) && !curDepth && !cu.getCbf(absPartIdxC, TEXT_CHROMA_U, 0) && !cu.getCbf(absPartIdxC, TEXT_CHROMA_V, 0))
986     {
987         X265_CHECK(cu.getCbf(absPartIdxC, TEXT_LUMA, 0), "CBF should have been set\n");
988     }
989     else
990         codeQtCbfLuma(cu.getCbf(absPartIdx, TEXT_LUMA, curDepth), curDepth);
991 
992     uint32_t cbfY = cu.getCbf(absPartIdx, TEXT_LUMA, curDepth);
993     uint32_t cbfU = cu.getCbf(absPartIdxC, TEXT_CHROMA_U, curDepth);
994     uint32_t cbfV = cu.getCbf(absPartIdxC, TEXT_CHROMA_V, curDepth);
995     if (!(cbfY || cbfU || cbfV))
996         return;
997 
998     // dQP: only for CTU once
999     if (cu.m_slice->m_pps->bUseDQP && bCodeDQP)
1000     {
1001         uint32_t log2CUSize = cu.m_log2CUSize[absPartIdx];
1002         uint32_t absPartIdxLT = absPartIdx & (0xFF << (log2CUSize - LOG2_UNIT_SIZE) * 2);
1003         codeDeltaQP(cu, absPartIdxLT);
1004         bCodeDQP = false;
1005     }
1006 
1007     if (cbfY)
1008     {
1009         uint32_t coeffOffset = absPartIdx << (LOG2_UNIT_SIZE * 2);
1010         codeCoeffNxN(cu, cu.m_trCoeff[0] + coeffOffset, absPartIdx, log2CurSize, TEXT_LUMA);
1011         if (!(cbfU || cbfV))
1012             return;
1013     }
1014 
1015     if (bSmallChroma)
1016     {
1017         if ((absPartIdx & 3) != 3)
1018             return;
1019 
1020         const uint32_t log2CurSizeC = 2;
1021         const bool splitIntoSubTUs = (cu.m_chromaFormat == X265_CSP_I422);
1022         const uint32_t curPartNum = 4;
1023         uint32_t coeffOffsetC  = absPartIdxC << (LOG2_UNIT_SIZE * 2 - (hChromaShift + vChromaShift));
1024         for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
1025         {
1026             TURecurse tuIterator(splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, curPartNum, absPartIdxC);
1027             const coeff_t* coeffChroma = cu.m_trCoeff[chromaId];
1028             do
1029             {
1030                 if (cu.getCbf(tuIterator.absPartIdxTURelCU, (TextType)chromaId, curDepth + splitIntoSubTUs))
1031                 {
1032                     uint32_t subTUOffset = tuIterator.section << (log2CurSizeC * 2);
1033                     codeCoeffNxN(cu, coeffChroma + coeffOffsetC + subTUOffset, tuIterator.absPartIdxTURelCU, log2CurSizeC, (TextType)chromaId);
1034                 }
1035             }
1036             while (tuIterator.isNextSection());
1037         }
1038     }
1039     else
1040     {
1041         uint32_t log2CurSizeC = log2CurSize - hChromaShift;
1042         const bool splitIntoSubTUs = (cu.m_chromaFormat == X265_CSP_I422);
1043         uint32_t curPartNum = 1 << (log2CurSize - LOG2_UNIT_SIZE) * 2;
1044         uint32_t coeffOffsetC  = absPartIdxC << (LOG2_UNIT_SIZE * 2 - (hChromaShift + vChromaShift));
1045         for (uint32_t chromaId = TEXT_CHROMA_U; chromaId <= TEXT_CHROMA_V; chromaId++)
1046         {
1047             TURecurse tuIterator(splitIntoSubTUs ? VERTICAL_SPLIT : DONT_SPLIT, curPartNum, absPartIdxC);
1048             const coeff_t* coeffChroma = cu.m_trCoeff[chromaId];
1049             do
1050             {
1051                 if (cu.getCbf(tuIterator.absPartIdxTURelCU, (TextType)chromaId, curDepth + splitIntoSubTUs))
1052                 {
1053                     uint32_t subTUOffset = tuIterator.section << (log2CurSizeC * 2);
1054                     codeCoeffNxN(cu, coeffChroma + coeffOffsetC + subTUOffset, tuIterator.absPartIdxTURelCU, log2CurSizeC, (TextType)chromaId);
1055                 }
1056             }
1057             while (tuIterator.isNextSection());
1058         }
1059     }
1060 }
1061 
encodeTransformLuma(const CUData & cu,uint32_t absPartIdx,uint32_t curDepth,uint32_t log2CurSize,bool & bCodeDQP,const uint32_t depthRange[2])1062 void Entropy::encodeTransformLuma(const CUData& cu, uint32_t absPartIdx, uint32_t curDepth, uint32_t log2CurSize,
1063                               bool& bCodeDQP, const uint32_t depthRange[2])
1064 {
1065     const bool subdiv = cu.m_tuDepth[absPartIdx] > curDepth;
1066 
1067     /* in each of these conditions, the subdiv flag is implied and not signaled,
1068      * so we have checks to make sure the implied value matches our intentions */
1069     if (cu.isIntra(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N && log2CurSize == MIN_LOG2_CU_SIZE)
1070     {
1071         X265_CHECK(subdiv, "intra NxN requires TU depth below CU depth\n");
1072     }
1073     else if (cu.isInter(absPartIdx) && cu.m_partSize[absPartIdx] != SIZE_2Nx2N &&
1074              !curDepth && cu.m_slice->m_sps->quadtreeTUMaxDepthInter == 1)
1075     {
1076         X265_CHECK(subdiv, "inter TU must be smaller than CU when not 2Nx2N part size: log2CurSize %d, depthRange[0] %d\n", log2CurSize, depthRange[0]);
1077     }
1078     else if (log2CurSize > depthRange[1])
1079     {
1080         X265_CHECK(subdiv, "TU is larger than the max allowed, it should have been split\n");
1081     }
1082     else if (log2CurSize == cu.m_slice->m_sps->quadtreeTULog2MinSize || log2CurSize == depthRange[0])
1083     {
1084         X265_CHECK(!subdiv, "min sized TU cannot be subdivided\n");
1085     }
1086     else
1087     {
1088         X265_CHECK(log2CurSize > depthRange[0], "transform size failure\n");
1089         codeTransformSubdivFlag(subdiv, 5 - log2CurSize);
1090     }
1091 
1092     if (subdiv)
1093     {
1094         --log2CurSize;
1095         ++curDepth;
1096 
1097         uint32_t qNumParts = 1 << (log2CurSize - LOG2_UNIT_SIZE) * 2;
1098 
1099         encodeTransformLuma(cu, absPartIdx + 0 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1100         encodeTransformLuma(cu, absPartIdx + 1 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1101         encodeTransformLuma(cu, absPartIdx + 2 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1102         encodeTransformLuma(cu, absPartIdx + 3 * qNumParts, curDepth, log2CurSize, bCodeDQP, depthRange);
1103         return;
1104     }
1105 
1106     if (!cu.isIntra(absPartIdx) && !curDepth)
1107     {
1108         X265_CHECK(cu.getCbf(absPartIdx, TEXT_LUMA, 0), "CBF should have been set\n");
1109     }
1110     else
1111         codeQtCbfLuma(cu.getCbf(absPartIdx, TEXT_LUMA, curDepth), curDepth);
1112 
1113     uint32_t cbfY = cu.getCbf(absPartIdx, TEXT_LUMA, curDepth);
1114 
1115     if (!cbfY)
1116         return;
1117 
1118     // dQP: only for CTU once
1119     if (cu.m_slice->m_pps->bUseDQP && bCodeDQP)
1120     {
1121         uint32_t log2CUSize = cu.m_log2CUSize[absPartIdx];
1122         uint32_t absPartIdxLT = absPartIdx & (0xFF << (log2CUSize - LOG2_UNIT_SIZE) * 2);
1123         codeDeltaQP(cu, absPartIdxLT);
1124         bCodeDQP = false;
1125     }
1126 
1127     if (cbfY)
1128     {
1129         uint32_t coeffOffset = absPartIdx << (LOG2_UNIT_SIZE * 2);
1130         codeCoeffNxN(cu, cu.m_trCoeff[0] + coeffOffset, absPartIdx, log2CurSize, TEXT_LUMA);
1131     }
1132 }
1133 
1134 
codePredInfo(const CUData & cu,uint32_t absPartIdx)1135 void Entropy::codePredInfo(const CUData& cu, uint32_t absPartIdx)
1136 {
1137     if (cu.isIntra(absPartIdx)) // If it is intra mode, encode intra prediction mode.
1138     {
1139         codeIntraDirLumaAng(cu, absPartIdx, true);
1140         if (cu.m_chromaFormat != X265_CSP_I400)
1141         {
1142             uint32_t chromaDirMode[NUM_CHROMA_MODE];
1143             cu.getAllowedChromaDir(absPartIdx, chromaDirMode);
1144 
1145             codeIntraDirChroma(cu, absPartIdx, chromaDirMode);
1146 
1147             if (cu.m_chromaFormat == X265_CSP_I444 && cu.m_partSize[absPartIdx] != SIZE_2Nx2N)
1148             {
1149                 uint32_t qNumParts = 1 << (cu.m_log2CUSize[absPartIdx] - 1 - LOG2_UNIT_SIZE) * 2;
1150                 for (uint32_t qIdx = 1; qIdx < 4; ++qIdx)
1151                 {
1152                     absPartIdx += qNumParts;
1153                     cu.getAllowedChromaDir(absPartIdx, chromaDirMode);
1154                     codeIntraDirChroma(cu, absPartIdx, chromaDirMode);
1155                 }
1156             }
1157         }
1158     }
1159     else // if it is inter mode, encode motion vector and reference index
1160         codePUWise(cu, absPartIdx);
1161 }
1162 
1163 /** encode motion information for every PU block */
codePUWise(const CUData & cu,uint32_t absPartIdx)1164 void Entropy::codePUWise(const CUData& cu, uint32_t absPartIdx)
1165 {
1166     X265_CHECK(!cu.isIntra(absPartIdx), "intra block not expected\n");
1167     uint32_t numPU = cu.getNumPartInter(absPartIdx);
1168 
1169     for (uint32_t puIdx = 0, subPartIdx = absPartIdx; puIdx < numPU; puIdx++, subPartIdx += cu.getPUOffset(puIdx, absPartIdx))
1170     {
1171         codeMergeFlag(cu, subPartIdx);
1172         if (cu.m_mergeFlag[subPartIdx])
1173             codeMergeIndex(cu, subPartIdx);
1174         else
1175         {
1176             if (cu.m_slice->isInterB())
1177                 codeInterDir(cu, subPartIdx);
1178 
1179             uint32_t interDir = cu.m_interDir[subPartIdx];
1180             for (uint32_t list = 0; list < 2; list++)
1181             {
1182                 if (interDir & (1 << list))
1183                 {
1184                     X265_CHECK(cu.m_slice->m_numRefIdx[list] > 0, "numRefs should have been > 0\n");
1185 
1186                     codeRefFrmIdxPU(cu, subPartIdx, list);
1187                     codeMvd(cu, subPartIdx, list);
1188                     codeMVPIdx(cu.m_mvpIdx[list][subPartIdx]);
1189                 }
1190             }
1191         }
1192     }
1193 }
1194 
1195 /** encode reference frame index for a PU block */
codeRefFrmIdxPU(const CUData & cu,uint32_t absPartIdx,int list)1196 void Entropy::codeRefFrmIdxPU(const CUData& cu, uint32_t absPartIdx, int list)
1197 {
1198     X265_CHECK(!cu.isIntra(absPartIdx), "intra block not expected\n");
1199 
1200     if (cu.m_slice->m_numRefIdx[list] > 1)
1201         codeRefFrmIdx(cu, absPartIdx, list);
1202 }
1203 
codeCoeff(const CUData & cu,uint32_t absPartIdx,bool & bCodeDQP,const uint32_t depthRange[2])1204 void Entropy::codeCoeff(const CUData& cu, uint32_t absPartIdx, bool& bCodeDQP, const uint32_t depthRange[2])
1205 {
1206     if (!cu.isIntra(absPartIdx))
1207     {
1208         if (!(cu.m_mergeFlag[absPartIdx] && cu.m_partSize[absPartIdx] == SIZE_2Nx2N))
1209             codeQtRootCbf(cu.getQtRootCbf(absPartIdx));
1210         if (!cu.getQtRootCbf(absPartIdx))
1211             return;
1212     }
1213 
1214     uint32_t log2CUSize = cu.m_log2CUSize[absPartIdx];
1215     if (cu.m_chromaFormat == X265_CSP_I400)
1216         encodeTransformLuma(cu, absPartIdx, 0, log2CUSize, bCodeDQP, depthRange);
1217     else
1218         encodeTransform(cu, absPartIdx, 0, log2CUSize, bCodeDQP, depthRange);
1219 }
1220 
codeSaoOffset(const SaoCtuParam & ctuParam,int plane)1221 void Entropy::codeSaoOffset(const SaoCtuParam& ctuParam, int plane)
1222 {
1223     int typeIdx = ctuParam.typeIdx;
1224 
1225     if (plane != 2)
1226     {
1227         encodeBin(typeIdx >= 0, m_contextState[OFF_SAO_TYPE_IDX_CTX]);
1228         if (typeIdx >= 0)
1229             encodeBinEP(typeIdx < SAO_BO ? 1 : 0);
1230     }
1231 
1232     if (typeIdx >= 0)
1233     {
1234         enum { OFFSET_THRESH = 1 << X265_MIN(X265_DEPTH - 5, 5) };
1235         if (typeIdx == SAO_BO)
1236         {
1237             for (int i = 0; i < SAO_NUM_OFFSET; i++)
1238                 codeSaoMaxUvlc(abs(ctuParam.offset[i]), OFFSET_THRESH - 1);
1239 
1240             for (int i = 0; i < SAO_NUM_OFFSET; i++)
1241                 if (ctuParam.offset[i] != 0)
1242                     encodeBinEP(ctuParam.offset[i] < 0);
1243 
1244             encodeBinsEP(ctuParam.bandPos, 5);
1245         }
1246         else // if (typeIdx < SAO_BO)
1247         {
1248             codeSaoMaxUvlc(ctuParam.offset[0], OFFSET_THRESH - 1);
1249             codeSaoMaxUvlc(ctuParam.offset[1], OFFSET_THRESH - 1);
1250             codeSaoMaxUvlc(-ctuParam.offset[2], OFFSET_THRESH - 1);
1251             codeSaoMaxUvlc(-ctuParam.offset[3], OFFSET_THRESH - 1);
1252             if (plane != 2)
1253                 encodeBinsEP((uint32_t)(typeIdx), 2);
1254         }
1255     }
1256 }
1257 
codeSaoOffsetEO(int * offset,int typeIdx,int plane)1258 void Entropy::codeSaoOffsetEO(int *offset, int typeIdx, int plane)
1259 {
1260     if (plane != 2)
1261     {
1262         encodeBin(1, m_contextState[OFF_SAO_TYPE_IDX_CTX]);
1263         encodeBinEP(1);
1264     }
1265 
1266     enum { OFFSET_THRESH = 1 << X265_MIN(X265_DEPTH - 5, 5) };
1267 
1268     codeSaoMaxUvlc(offset[0], OFFSET_THRESH - 1);
1269     codeSaoMaxUvlc(offset[1], OFFSET_THRESH - 1);
1270     codeSaoMaxUvlc(-offset[2], OFFSET_THRESH - 1);
1271     codeSaoMaxUvlc(-offset[3], OFFSET_THRESH - 1);
1272     if (plane != 2)
1273         encodeBinsEP((uint32_t)(typeIdx), 2);
1274 }
1275 
codeSaoOffsetBO(int * offset,int bandPos,int plane)1276 void Entropy::codeSaoOffsetBO(int *offset, int bandPos, int plane)
1277 {
1278     if (plane != 2)
1279     {
1280         encodeBin(1, m_contextState[OFF_SAO_TYPE_IDX_CTX]);
1281         encodeBinEP(0);
1282     }
1283 
1284     enum { OFFSET_THRESH = 1 << X265_MIN(X265_DEPTH - 5, 5) };
1285 
1286     for (int i = 0; i < SAO_NUM_OFFSET; i++)
1287         codeSaoMaxUvlc(abs(offset[i]), OFFSET_THRESH - 1);
1288 
1289     for (int i = 0; i < SAO_NUM_OFFSET; i++)
1290         if (offset[i] != 0)
1291             encodeBinEP(offset[i] < 0);
1292 
1293     encodeBinsEP(bandPos, 5);
1294 }
1295 
1296 /** initialize context model with respect to QP and initialization value */
sbacInit(int qp,int initValue)1297 uint8_t sbacInit(int qp, int initValue)
1298 {
1299     qp = x265_clip3(QP_MIN, QP_MAX_SPEC, qp);
1300 
1301     int  slope      = (initValue >> 4) * 5 - 45;
1302     int  offset     = ((initValue & 15) << 3) - 16;
1303     int  initState  =  X265_MIN(X265_MAX(1, (((slope * qp) >> 4) + offset)), 126);
1304     uint32_t mpState = (initState >= 64);
1305     uint32_t state = ((mpState ? (initState - 64) : (63 - initState)) << 1) + mpState;
1306 
1307     return (uint8_t)state;
1308 }
1309 
initBuffer(uint8_t * contextModel,SliceType sliceType,int qp,uint8_t * ctxModel,int size)1310 static void initBuffer(uint8_t* contextModel, SliceType sliceType, int qp, uint8_t* ctxModel, int size)
1311 {
1312     ctxModel += sliceType * size;
1313 
1314     for (int n = 0; n < size; n++)
1315         contextModel[n] = sbacInit(qp, ctxModel[n]);
1316 }
1317 
resetEntropy(const Slice & slice)1318 void Entropy::resetEntropy(const Slice& slice)
1319 {
1320     int  qp              = slice.m_sliceQp;
1321     SliceType sliceType  = slice.m_sliceType;
1322 
1323     initBuffer(&m_contextState[OFF_SPLIT_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SPLIT_FLAG, NUM_SPLIT_FLAG_CTX);
1324     initBuffer(&m_contextState[OFF_SKIP_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SKIP_FLAG, NUM_SKIP_FLAG_CTX);
1325     initBuffer(&m_contextState[OFF_MERGE_FLAG_EXT_CTX], sliceType, qp, (uint8_t*)INIT_MERGE_FLAG_EXT, NUM_MERGE_FLAG_EXT_CTX);
1326     initBuffer(&m_contextState[OFF_MERGE_IDX_EXT_CTX], sliceType, qp, (uint8_t*)INIT_MERGE_IDX_EXT, NUM_MERGE_IDX_EXT_CTX);
1327     initBuffer(&m_contextState[OFF_PART_SIZE_CTX], sliceType, qp, (uint8_t*)INIT_PART_SIZE, NUM_PART_SIZE_CTX);
1328     initBuffer(&m_contextState[OFF_PRED_MODE_CTX], sliceType, qp, (uint8_t*)INIT_PRED_MODE, NUM_PRED_MODE_CTX);
1329     initBuffer(&m_contextState[OFF_ADI_CTX], sliceType, qp, (uint8_t*)INIT_INTRA_PRED_MODE, NUM_ADI_CTX);
1330     initBuffer(&m_contextState[OFF_CHROMA_PRED_CTX], sliceType, qp, (uint8_t*)INIT_CHROMA_PRED_MODE, NUM_CHROMA_PRED_CTX);
1331     initBuffer(&m_contextState[OFF_DELTA_QP_CTX], sliceType, qp, (uint8_t*)INIT_DQP, NUM_DELTA_QP_CTX);
1332     initBuffer(&m_contextState[OFF_INTER_DIR_CTX], sliceType, qp, (uint8_t*)INIT_INTER_DIR, NUM_INTER_DIR_CTX);
1333     initBuffer(&m_contextState[OFF_REF_NO_CTX], sliceType, qp, (uint8_t*)INIT_REF_PIC, NUM_REF_NO_CTX);
1334     initBuffer(&m_contextState[OFF_MV_RES_CTX], sliceType, qp, (uint8_t*)INIT_MVD, NUM_MV_RES_CTX);
1335     initBuffer(&m_contextState[OFF_QT_CBF_CTX], sliceType, qp, (uint8_t*)INIT_QT_CBF, NUM_QT_CBF_CTX);
1336     initBuffer(&m_contextState[OFF_TRANS_SUBDIV_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_TRANS_SUBDIV_FLAG, NUM_TRANS_SUBDIV_FLAG_CTX);
1337     initBuffer(&m_contextState[OFF_QT_ROOT_CBF_CTX], sliceType, qp, (uint8_t*)INIT_QT_ROOT_CBF, NUM_QT_ROOT_CBF_CTX);
1338     initBuffer(&m_contextState[OFF_SIG_CG_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SIG_CG_FLAG, 2 * NUM_SIG_CG_FLAG_CTX);
1339     initBuffer(&m_contextState[OFF_SIG_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SIG_FLAG, NUM_SIG_FLAG_CTX);
1340     initBuffer(&m_contextState[OFF_CTX_LAST_FLAG_X], sliceType, qp, (uint8_t*)INIT_LAST, NUM_CTX_LAST_FLAG_XY);
1341     initBuffer(&m_contextState[OFF_CTX_LAST_FLAG_Y], sliceType, qp, (uint8_t*)INIT_LAST, NUM_CTX_LAST_FLAG_XY);
1342     initBuffer(&m_contextState[OFF_ONE_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_ONE_FLAG, NUM_ONE_FLAG_CTX);
1343     initBuffer(&m_contextState[OFF_ABS_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_ABS_FLAG, NUM_ABS_FLAG_CTX);
1344     initBuffer(&m_contextState[OFF_MVP_IDX_CTX], sliceType, qp, (uint8_t*)INIT_MVP_IDX, NUM_MVP_IDX_CTX);
1345     initBuffer(&m_contextState[OFF_SAO_MERGE_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_SAO_MERGE_FLAG, NUM_SAO_MERGE_FLAG_CTX);
1346     initBuffer(&m_contextState[OFF_SAO_TYPE_IDX_CTX], sliceType, qp, (uint8_t*)INIT_SAO_TYPE_IDX, NUM_SAO_TYPE_IDX_CTX);
1347     initBuffer(&m_contextState[OFF_TRANSFORMSKIP_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_TRANSFORMSKIP_FLAG, 2 * NUM_TRANSFORMSKIP_FLAG_CTX);
1348     initBuffer(&m_contextState[OFF_TQUANT_BYPASS_FLAG_CTX], sliceType, qp, (uint8_t*)INIT_CU_TRANSQUANT_BYPASS_FLAG, NUM_TQUANT_BYPASS_FLAG_CTX);
1349     // new structure
1350 
1351     start();
1352 }
1353 
1354 /* code explicit wp tables */
codePredWeightTable(const Slice & slice)1355 void Entropy::codePredWeightTable(const Slice& slice)
1356 {
1357     const WeightParam *wp;
1358     bool            bChroma = slice.m_sps->chromaFormatIdc != X265_CSP_I400;
1359     bool            bDenomCoded  = false;
1360     int             numRefDirs   = slice.m_sliceType == B_SLICE ? 2 : 1;
1361     uint32_t        totalSignalledWeightFlags = 0;
1362 
1363     if ((slice.m_sliceType == P_SLICE && slice.m_pps->bUseWeightPred) ||
1364         (slice.m_sliceType == B_SLICE && slice.m_pps->bUseWeightedBiPred))
1365     {
1366         for (int list = 0; list < numRefDirs; list++)
1367         {
1368             for (int ref = 0; ref < slice.m_numRefIdx[list]; ref++)
1369             {
1370                 wp = slice.m_weightPredTable[list][ref];
1371                 if (!bDenomCoded)
1372                 {
1373                     WRITE_UVLC(wp[0].log2WeightDenom, "luma_log2_weight_denom");
1374 
1375                     if (bChroma)
1376                     {
1377                         int deltaDenom = wp[1].log2WeightDenom - wp[0].log2WeightDenom;
1378                         WRITE_SVLC(deltaDenom, "delta_chroma_log2_weight_denom");
1379                     }
1380                     bDenomCoded = true;
1381                 }
1382                 WRITE_FLAG(!!wp[0].wtPresent, "luma_weight_lX_flag");
1383                 totalSignalledWeightFlags += wp[0].wtPresent;
1384             }
1385 
1386             if (bChroma)
1387             {
1388                 for (int ref = 0; ref < slice.m_numRefIdx[list]; ref++)
1389                 {
1390                     wp = slice.m_weightPredTable[list][ref];
1391                     WRITE_FLAG(!!wp[1].wtPresent, "chroma_weight_lX_flag");
1392                     totalSignalledWeightFlags += 2 * wp[1].wtPresent;
1393                 }
1394             }
1395 
1396             for (int ref = 0; ref < slice.m_numRefIdx[list]; ref++)
1397             {
1398                 wp = slice.m_weightPredTable[list][ref];
1399                 if (wp[0].wtPresent)
1400                 {
1401                     int deltaWeight = (wp[0].inputWeight - (1 << wp[0].log2WeightDenom));
1402                     WRITE_SVLC(deltaWeight, "delta_luma_weight_lX");
1403                     WRITE_SVLC(wp[0].inputOffset, "luma_offset_lX");
1404                 }
1405 
1406                 if (bChroma)
1407                 {
1408                     if (wp[1].wtPresent)
1409                     {
1410                         for (int plane = 1; plane < 3; plane++)
1411                         {
1412                             int deltaWeight = (wp[plane].inputWeight - (1 << wp[1].log2WeightDenom));
1413                             WRITE_SVLC(deltaWeight, "delta_chroma_weight_lX");
1414 
1415                             int pred = (128 - ((128 * wp[plane].inputWeight) >> (wp[plane].log2WeightDenom)));
1416                             int deltaChroma = (wp[plane].inputOffset - pred);
1417                             WRITE_SVLC(deltaChroma, "delta_chroma_offset_lX");
1418                         }
1419                     }
1420                 }
1421             }
1422         }
1423 
1424         X265_CHECK(totalSignalledWeightFlags <= 24, "total weights must be <= 24\n");
1425     }
1426 }
1427 
writeUnaryMaxSymbol(uint32_t symbol,uint8_t * scmModel,int offset,uint32_t maxSymbol)1428 void Entropy::writeUnaryMaxSymbol(uint32_t symbol, uint8_t* scmModel, int offset, uint32_t maxSymbol)
1429 {
1430     X265_CHECK(maxSymbol > 0, "maxSymbol too small\n");
1431 
1432     encodeBin(symbol ? 1 : 0, scmModel[0]);
1433 
1434     if (!symbol)
1435         return;
1436 
1437     bool bCodeLast = (maxSymbol > symbol);
1438 
1439     while (--symbol)
1440         encodeBin(1, scmModel[offset]);
1441 
1442     if (bCodeLast)
1443         encodeBin(0, scmModel[offset]);
1444 }
1445 
writeEpExGolomb(uint32_t symbol,uint32_t count)1446 void Entropy::writeEpExGolomb(uint32_t symbol, uint32_t count)
1447 {
1448     uint32_t bins = 0;
1449     int numBins = 0;
1450 
1451     while (symbol >= (uint32_t)(1 << count))
1452     {
1453         bins = 2 * bins + 1;
1454         numBins++;
1455         symbol -= 1 << count;
1456         count++;
1457     }
1458 
1459     bins = 2 * bins + 0;
1460     numBins++;
1461 
1462     bins = (bins << count) | symbol;
1463     numBins += count;
1464 
1465     X265_CHECK(numBins <= 32, "numBins too large\n");
1466     encodeBinsEP(bins, numBins);
1467 }
1468 
1469 /** Coding of coeff_abs_level_minus3 */
writeCoefRemainExGolomb(uint32_t codeNumber,uint32_t absGoRice)1470 void Entropy::writeCoefRemainExGolomb(uint32_t codeNumber, uint32_t absGoRice)
1471 {
1472     uint32_t length;
1473     const uint32_t codeRemain = codeNumber & ((1 << absGoRice) - 1);
1474 
1475     if ((codeNumber >> absGoRice) < COEF_REMAIN_BIN_REDUCTION)
1476     {
1477         length = codeNumber >> absGoRice;
1478 
1479         X265_CHECK(codeNumber - (length << absGoRice) == (codeNumber & ((1 << absGoRice) - 1)), "codeNumber failure\n");
1480         X265_CHECK(length + 1 + absGoRice < 32, "length failure\n");
1481         encodeBinsEP((((1 << (length + 1)) - 2) << absGoRice) + codeRemain, length + 1 + absGoRice);
1482     }
1483     else
1484     {
1485         length = 0;
1486         codeNumber = (codeNumber >> absGoRice) - COEF_REMAIN_BIN_REDUCTION;
1487         {
1488             unsigned long idx;
1489             CLZ(idx, codeNumber + 1);
1490             length = idx;
1491             X265_CHECK((codeNumber != 0) || (length == 0), "length check failure\n");
1492             codeNumber -= (1 << idx) - 1;
1493         }
1494         codeNumber = (codeNumber << absGoRice) + codeRemain;
1495 
1496         encodeBinsEP((1 << (COEF_REMAIN_BIN_REDUCTION + length + 1)) - 2, COEF_REMAIN_BIN_REDUCTION + length + 1);
1497         encodeBinsEP(codeNumber, length + absGoRice);
1498     }
1499 }
1500 
1501 // SBAC RD
loadIntraDirModeLuma(const Entropy & src)1502 void Entropy::loadIntraDirModeLuma(const Entropy& src)
1503 {
1504     X265_CHECK(src.m_valid, "invalid copy source context\n");
1505     m_fracBits = src.m_fracBits;
1506     m_contextState[OFF_ADI_CTX] = src.m_contextState[OFF_ADI_CTX];
1507 }
1508 
copyFrom(const Entropy & src)1509 void Entropy::copyFrom(const Entropy& src)
1510 {
1511     X265_CHECK(src.m_valid, "invalid copy source context\n");
1512 
1513     copyState(src);
1514 
1515     memcpy(m_contextState, src.m_contextState, MAX_OFF_CTX_MOD * sizeof(uint8_t));
1516     markValid();
1517 }
1518 
codePartSize(const CUData & cu,uint32_t absPartIdx,uint32_t depth)1519 void Entropy::codePartSize(const CUData& cu, uint32_t absPartIdx, uint32_t depth)
1520 {
1521     PartSize partSize = (PartSize)cu.m_partSize[absPartIdx];
1522 
1523     if (cu.isIntra(absPartIdx))
1524     {
1525         if (depth == cu.m_encData->m_param->maxCUDepth)
1526             encodeBin(partSize == SIZE_2Nx2N ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX]);
1527         return;
1528     }
1529 
1530     switch (partSize)
1531     {
1532     case SIZE_2Nx2N:
1533         encodeBin(1, m_contextState[OFF_PART_SIZE_CTX]);
1534         break;
1535 
1536     case SIZE_2NxN:
1537     case SIZE_2NxnU:
1538     case SIZE_2NxnD:
1539         encodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
1540         encodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 1]);
1541         if (cu.m_slice->m_sps->maxAMPDepth > depth)
1542         {
1543             encodeBin((partSize == SIZE_2NxN) ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX + 3]);
1544             if (partSize != SIZE_2NxN)
1545                 encodeBinEP((partSize == SIZE_2NxnU ? 0 : 1));
1546         }
1547         break;
1548 
1549     case SIZE_Nx2N:
1550     case SIZE_nLx2N:
1551     case SIZE_nRx2N:
1552         encodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 0]);
1553         encodeBin(0, m_contextState[OFF_PART_SIZE_CTX + 1]);
1554         if (depth == cu.m_encData->m_param->maxCUDepth && !(cu.m_log2CUSize[absPartIdx] == 3))
1555             encodeBin(1, m_contextState[OFF_PART_SIZE_CTX + 2]);
1556         if (cu.m_slice->m_sps->maxAMPDepth > depth)
1557         {
1558             encodeBin((partSize == SIZE_Nx2N) ? 1 : 0, m_contextState[OFF_PART_SIZE_CTX + 3]);
1559             if (partSize != SIZE_Nx2N)
1560                 encodeBinEP((partSize == SIZE_nLx2N ? 0 : 1));
1561         }
1562         break;
1563     default:
1564         X265_CHECK(0, "invalid CU partition\n");
1565         break;
1566     }
1567 }
1568 
codeMergeIndex(const CUData & cu,uint32_t absPartIdx)1569 void Entropy::codeMergeIndex(const CUData& cu, uint32_t absPartIdx)
1570 {
1571     uint32_t numCand = cu.m_slice->m_maxNumMergeCand;
1572 
1573     if (numCand > 1)
1574     {
1575         uint32_t unaryIdx = cu.m_mvpIdx[0][absPartIdx]; // merge candidate index was stored in L0 MVP idx
1576         encodeBin((unaryIdx != 0), m_contextState[OFF_MERGE_IDX_EXT_CTX]);
1577 
1578         X265_CHECK(unaryIdx < numCand, "unaryIdx out of range\n");
1579 
1580         if (unaryIdx != 0)
1581         {
1582             uint32_t mask = (1 << unaryIdx) - 2;
1583             mask >>= (unaryIdx == numCand - 1) ? 1 : 0;
1584             encodeBinsEP(mask, unaryIdx - (unaryIdx == numCand - 1));
1585         }
1586     }
1587 }
1588 
codeIntraDirLumaAng(const CUData & cu,uint32_t absPartIdx,bool isMultiple)1589 void Entropy::codeIntraDirLumaAng(const CUData& cu, uint32_t absPartIdx, bool isMultiple)
1590 {
1591     uint32_t dir[4], j;
1592     uint32_t preds[4][3];
1593     int predIdx[4];
1594     uint32_t partNum = isMultiple && cu.m_partSize[absPartIdx] != SIZE_2Nx2N ? 4 : 1;
1595     uint32_t qNumParts = 1 << (cu.m_log2CUSize[absPartIdx] - 1 - LOG2_UNIT_SIZE) * 2;
1596 
1597     for (j = 0; j < partNum; j++, absPartIdx += qNumParts)
1598     {
1599         dir[j] = cu.m_lumaIntraDir[absPartIdx];
1600         cu.getIntraDirLumaPredictor(absPartIdx, preds[j]);
1601         predIdx[j] = -1;
1602         for (uint32_t i = 0; i < 3; i++)
1603             if (dir[j] == preds[j][i])
1604                 predIdx[j] = i;
1605 
1606         encodeBin((predIdx[j] != -1) ? 1 : 0, m_contextState[OFF_ADI_CTX]);
1607     }
1608 
1609     for (j = 0; j < partNum; j++)
1610     {
1611         if (predIdx[j] != -1)
1612         {
1613             X265_CHECK((predIdx[j] >= 0) && (predIdx[j] <= 2), "predIdx out of range\n");
1614             // NOTE: Mapping
1615             //       0 = 0
1616             //       1 = 10
1617             //       2 = 11
1618             int nonzero = (!!predIdx[j]);
1619             encodeBinsEP(predIdx[j] + nonzero, 1 + nonzero);
1620         }
1621         else
1622         {
1623             if (preds[j][0] > preds[j][1])
1624                 std::swap(preds[j][0], preds[j][1]);
1625 
1626             if (preds[j][0] > preds[j][2])
1627                 std::swap(preds[j][0], preds[j][2]);
1628 
1629             if (preds[j][1] > preds[j][2])
1630                 std::swap(preds[j][1], preds[j][2]);
1631 
1632             dir[j] += (dir[j] > preds[j][2]) ? -1 : 0;
1633             dir[j] += (dir[j] > preds[j][1]) ? -1 : 0;
1634             dir[j] += (dir[j] > preds[j][0]) ? -1 : 0;
1635 
1636             encodeBinsEP(dir[j], 5);
1637         }
1638     }
1639 }
1640 
codeIntraDirChroma(const CUData & cu,uint32_t absPartIdx,uint32_t * chromaDirMode)1641 void Entropy::codeIntraDirChroma(const CUData& cu, uint32_t absPartIdx, uint32_t *chromaDirMode)
1642 {
1643     uint32_t intraDirChroma = cu.m_chromaIntraDir[absPartIdx];
1644 
1645     if (intraDirChroma == DM_CHROMA_IDX)
1646         encodeBin(0, m_contextState[OFF_CHROMA_PRED_CTX]);
1647     else
1648     {
1649         for (int i = 0; i < NUM_CHROMA_MODE - 1; i++)
1650         {
1651             if (intraDirChroma == chromaDirMode[i])
1652             {
1653                 intraDirChroma = i;
1654                 break;
1655             }
1656         }
1657 
1658         encodeBin(1, m_contextState[OFF_CHROMA_PRED_CTX]);
1659         encodeBinsEP(intraDirChroma, 2);
1660     }
1661 }
1662 
codeInterDir(const CUData & cu,uint32_t absPartIdx)1663 void Entropy::codeInterDir(const CUData& cu, uint32_t absPartIdx)
1664 {
1665     const uint32_t interDir = cu.m_interDir[absPartIdx] - 1;
1666     const uint32_t ctx      = cu.m_cuDepth[absPartIdx]; // the context of the inter dir is the depth of the CU
1667 
1668     if (cu.m_partSize[absPartIdx] == SIZE_2Nx2N || cu.m_log2CUSize[absPartIdx] != 3)
1669         encodeBin(interDir == 2 ? 1 : 0, m_contextState[OFF_INTER_DIR_CTX + ctx]);
1670     if (interDir < 2)
1671         encodeBin(interDir, m_contextState[OFF_INTER_DIR_CTX + 4]);
1672 }
1673 
codeRefFrmIdx(const CUData & cu,uint32_t absPartIdx,int list)1674 void Entropy::codeRefFrmIdx(const CUData& cu, uint32_t absPartIdx, int list)
1675 {
1676     uint32_t refFrame = cu.m_refIdx[list][absPartIdx];
1677 
1678     encodeBin(refFrame > 0, m_contextState[OFF_REF_NO_CTX]);
1679 
1680     if (refFrame > 0)
1681     {
1682         uint32_t refNum = cu.m_slice->m_numRefIdx[list] - 2;
1683         if (refNum == 0)
1684             return;
1685 
1686         refFrame--;
1687         encodeBin(refFrame > 0, m_contextState[OFF_REF_NO_CTX + 1]);
1688         if (refFrame > 0)
1689         {
1690             uint32_t mask = (1 << refFrame) - 2;
1691             mask >>= (refFrame == refNum) ? 1 : 0;
1692             encodeBinsEP(mask, refFrame - (refFrame == refNum));
1693         }
1694     }
1695 }
1696 
codeMvd(const CUData & cu,uint32_t absPartIdx,int list)1697 void Entropy::codeMvd(const CUData& cu, uint32_t absPartIdx, int list)
1698 {
1699     const MV& mvd = cu.m_mvd[list][absPartIdx];
1700     const int hor = mvd.x;
1701     const int ver = mvd.y;
1702 
1703     encodeBin(hor != 0 ? 1 : 0, m_contextState[OFF_MV_RES_CTX]);
1704     encodeBin(ver != 0 ? 1 : 0, m_contextState[OFF_MV_RES_CTX]);
1705 
1706     const bool bHorAbsGr0 = hor != 0;
1707     const bool bVerAbsGr0 = ver != 0;
1708     const uint32_t horAbs   = 0 > hor ? -hor : hor;
1709     const uint32_t verAbs   = 0 > ver ? -ver : ver;
1710 
1711     if (bHorAbsGr0)
1712         encodeBin(horAbs > 1 ? 1 : 0, m_contextState[OFF_MV_RES_CTX + 1]);
1713 
1714     if (bVerAbsGr0)
1715         encodeBin(verAbs > 1 ? 1 : 0, m_contextState[OFF_MV_RES_CTX + 1]);
1716 
1717     if (bHorAbsGr0)
1718     {
1719         if (horAbs > 1)
1720             writeEpExGolomb(horAbs - 2, 1);
1721 
1722         encodeBinEP(0 > hor ? 1 : 0);
1723     }
1724 
1725     if (bVerAbsGr0)
1726     {
1727         if (verAbs > 1)
1728             writeEpExGolomb(verAbs - 2, 1);
1729 
1730         encodeBinEP(0 > ver ? 1 : 0);
1731     }
1732 }
1733 
codeDeltaQP(const CUData & cu,uint32_t absPartIdx)1734 void Entropy::codeDeltaQP(const CUData& cu, uint32_t absPartIdx)
1735 {
1736     int dqp = cu.m_qp[absPartIdx] - cu.getRefQP(absPartIdx);
1737 
1738     int qpBdOffsetY = QP_BD_OFFSET;
1739 
1740     dqp = (dqp + 78 + qpBdOffsetY + (qpBdOffsetY / 2)) % (52 + qpBdOffsetY) - 26 - (qpBdOffsetY / 2);
1741 
1742     uint32_t absDQp = (uint32_t)((dqp > 0) ? dqp  : (-dqp));
1743     uint32_t TUValue = X265_MIN((int)absDQp, CU_DQP_TU_CMAX);
1744     writeUnaryMaxSymbol(TUValue, &m_contextState[OFF_DELTA_QP_CTX], 1, CU_DQP_TU_CMAX);
1745     if (absDQp >= CU_DQP_TU_CMAX)
1746         writeEpExGolomb(absDQp - CU_DQP_TU_CMAX, CU_DQP_EG_k);
1747 
1748     if (absDQp > 0)
1749     {
1750         uint32_t sign = (dqp > 0 ? 0 : 1);
1751         encodeBinEP(sign);
1752     }
1753 }
1754 
codeQtCbfChroma(const CUData & cu,uint32_t absPartIdx,TextType ttype,uint32_t tuDepth,bool lowestLevel)1755 void Entropy::codeQtCbfChroma(const CUData& cu, uint32_t absPartIdx, TextType ttype, uint32_t tuDepth, bool lowestLevel)
1756 {
1757     uint32_t ctx = tuDepth + 2;
1758 
1759     uint32_t log2TrSize = cu.m_log2CUSize[absPartIdx] - tuDepth;
1760     bool canQuadSplit       = (log2TrSize - cu.m_hChromaShift > 2);
1761     uint32_t lowestTUDepth  = tuDepth + ((!lowestLevel && !canQuadSplit) ? 1 : 0); // unsplittable TUs inherit their parent's CBF
1762 
1763     if (cu.m_chromaFormat == X265_CSP_I422 && (lowestLevel || !canQuadSplit)) // if sub-TUs are present
1764     {
1765         uint32_t subTUDepth        = lowestTUDepth + 1;   // if this is the lowest level of the TU-tree, the sub-TUs are directly below.
1766                                                           // Otherwise, this must be the level above the lowest level (as specified above)
1767         uint32_t tuNumParts = 1 << ((log2TrSize - LOG2_UNIT_SIZE) * 2 - 1);
1768 
1769         encodeBin(cu.getCbf(absPartIdx             , ttype, subTUDepth), m_contextState[OFF_QT_CBF_CTX + ctx]);
1770         encodeBin(cu.getCbf(absPartIdx + tuNumParts, ttype, subTUDepth), m_contextState[OFF_QT_CBF_CTX + ctx]);
1771     }
1772     else
1773         encodeBin(cu.getCbf(absPartIdx, ttype, lowestTUDepth), m_contextState[OFF_QT_CBF_CTX + ctx]);
1774 }
1775 
1776 #if CHECKED_BUILD || _DEBUG
costCoeffRemain_c0(uint16_t * absCoeff,int numNonZero)1777 uint32_t costCoeffRemain_c0(uint16_t *absCoeff, int numNonZero)
1778 {
1779     uint32_t goRiceParam = 0;
1780     int firstCoeff2 = 1;
1781     uint32_t baseLevelN = 0x5555AAAA; // 2-bits encode format baseLevel
1782 
1783     uint32_t sum = 0;
1784     int idx = 0;
1785     do
1786     {
1787         int baseLevel = (baseLevelN & 3) | firstCoeff2;
1788         X265_CHECK(baseLevel == ((idx < C1FLAG_NUMBER) ? (2 + firstCoeff2) : 1), "baseLevel check failurr\n");
1789         baseLevelN >>= 2;
1790         int codeNumber = absCoeff[idx] - baseLevel;
1791 
1792         if (codeNumber >= 0)
1793         {
1794             //writeCoefRemainExGolomb(absCoeff[idx] - baseLevel, goRiceParam);
1795             uint32_t length = 0;
1796 
1797             codeNumber = ((uint32_t)codeNumber >> goRiceParam) - COEF_REMAIN_BIN_REDUCTION;
1798             if (codeNumber >= 0)
1799             {
1800                 {
1801                     unsigned long cidx;
1802                     CLZ(cidx, codeNumber + 1);
1803                     length = cidx;
1804                 }
1805                 X265_CHECK((codeNumber != 0) || (length == 0), "length check failure\n");
1806 
1807                 codeNumber = (length + length);
1808             }
1809             sum += (COEF_REMAIN_BIN_REDUCTION + 1 + goRiceParam + codeNumber);
1810 
1811             if (absCoeff[idx] > (COEF_REMAIN_BIN_REDUCTION << goRiceParam))
1812                 goRiceParam = (goRiceParam + 1) - (goRiceParam >> 2);
1813             X265_CHECK(goRiceParam <= 4, "goRiceParam check failure\n");
1814         }
1815         if (absCoeff[idx] >= 2)
1816             firstCoeff2 = 0;
1817         idx++;
1818     }
1819     while(idx < numNonZero);
1820 
1821     return sum;
1822 }
1823 #endif // debug only code
1824 
codeCoeffNxN(const CUData & cu,const coeff_t * coeff,uint32_t absPartIdx,uint32_t log2TrSize,TextType ttype)1825 void Entropy::codeCoeffNxN(const CUData& cu, const coeff_t* coeff, uint32_t absPartIdx, uint32_t log2TrSize, TextType ttype)
1826 {
1827     uint32_t trSize = 1 << log2TrSize;
1828     uint32_t tqBypass = cu.m_tqBypass[absPartIdx];
1829     // compute number of significant coefficients
1830     uint32_t numSig = primitives.cu[log2TrSize - 2].count_nonzero(coeff);
1831     X265_CHECK(numSig > 0, "cbf check fail\n");
1832     bool bHideFirstSign = cu.m_slice->m_pps->bSignHideEnabled & !tqBypass;
1833 
1834     if (log2TrSize <= MAX_LOG2_TS_SIZE && !tqBypass && cu.m_slice->m_pps->bTransformSkipEnabled)
1835         codeTransformSkipFlags(cu.m_transformSkip[ttype][absPartIdx], ttype);
1836 
1837     bool bIsLuma = ttype == TEXT_LUMA;
1838 
1839     // select scans
1840     TUEntropyCodingParameters codingParameters;
1841     cu.getTUEntropyCodingParameters(codingParameters, absPartIdx, log2TrSize, bIsLuma);
1842 
1843     uint8_t coeffNum[MLS_GRP_NUM];      // value range[0, 16]
1844     uint16_t coeffSign[MLS_GRP_NUM];    // bit mask map for non-zero coeff sign
1845     uint16_t coeffFlag[MLS_GRP_NUM];    // bit mask map for non-zero coeff
1846 
1847     //----- encode significance map -----
1848 
1849     // Find position of last coefficient
1850     int scanPosLast = 0;
1851     uint32_t posLast;
1852     uint64_t sigCoeffGroupFlag64 = 0;
1853     //const uint32_t maskPosXY = ((uint32_t)~0 >> (31 - log2TrSize + MLS_CG_LOG2_SIZE)) >> 1;
1854     X265_CHECK((uint32_t)((1 << (log2TrSize - MLS_CG_LOG2_SIZE)) - 1) == (((uint32_t)~0 >> (31 - log2TrSize + MLS_CG_LOG2_SIZE)) >> 1), "maskPosXY fault\n");
1855 
1856     scanPosLast = primitives.scanPosLast(codingParameters.scan, coeff, coeffSign, coeffFlag, coeffNum, numSig, g_scan4x4[codingParameters.scanType], trSize);
1857     posLast = codingParameters.scan[scanPosLast];
1858 
1859     const int lastScanSet = scanPosLast >> MLS_CG_SIZE;
1860 
1861     // Calculate CG block non-zero mask, the latest CG always flag as non-zero in CG scan loop
1862     for(int idx = 0; idx < lastScanSet; idx++)
1863     {
1864         const uint8_t subSet = (uint8_t)codingParameters.scanCG[idx];
1865         const uint8_t nonZero = (coeffNum[idx] != 0);
1866         sigCoeffGroupFlag64 |= ((nonZero ? (uint64_t)1 : 0) << subSet);
1867     }
1868 
1869 
1870     // Code position of last coefficient
1871     {
1872         // The last position is composed of a prefix and suffix.
1873         // The prefix is context coded truncated unary bins. The suffix is bypass coded fixed length bins.
1874         // The bypass coded bins for both the x and y components are grouped together.
1875         uint32_t packedSuffixBits = 0, packedSuffixLen = 0;
1876         uint32_t pos[2] = { (posLast & (trSize - 1)), (posLast >> log2TrSize) };
1877         // swap
1878         if (codingParameters.scanType == SCAN_VER)
1879             std::swap(pos[0], pos[1]);
1880 
1881         int ctxIdx = bIsLuma ? (3 * (log2TrSize - 2) + (log2TrSize == 5)) : NUM_CTX_LAST_FLAG_XY_LUMA;
1882         int ctxShift = (bIsLuma ? (log2TrSize > 2) : (log2TrSize - 2));
1883         uint32_t maxGroupIdx = (log2TrSize << 1) - 1;
1884         X265_CHECK(((log2TrSize - 1) >> 2) == (uint32_t)(log2TrSize == 5), "ctxIdx check failure\n");
1885         X265_CHECK((uint32_t)ctxShift == (bIsLuma ? ((log2TrSize + 1) >> 2) : log2TrSize - 2), "ctxShift check failure\n");
1886 
1887         uint8_t *ctx = &m_contextState[OFF_CTX_LAST_FLAG_X];
1888         for (uint32_t i = 0; i < 2; i++, ctxIdx += NUM_CTX_LAST_FLAG_XY)
1889         {
1890             uint32_t temp = g_lastCoeffTable[pos[i]];
1891             uint32_t prefixOnes = temp & 15;
1892             uint32_t suffixLen = temp >> 4;
1893 
1894             for (uint32_t ctxLast = 0; ctxLast < prefixOnes; ctxLast++)
1895                 encodeBin(1, *(ctx + ctxIdx + (ctxLast >> ctxShift)));
1896 
1897             if (prefixOnes < maxGroupIdx)
1898                 encodeBin(0, *(ctx + ctxIdx + (prefixOnes >> ctxShift)));
1899 
1900             packedSuffixBits <<= suffixLen;
1901             packedSuffixBits |= (pos[i] & ((1 << suffixLen) - 1));
1902             packedSuffixLen += suffixLen;
1903         }
1904 
1905         encodeBinsEP(packedSuffixBits, packedSuffixLen);
1906     }
1907 
1908     // code significance flag
1909     uint8_t * const baseCoeffGroupCtx = &m_contextState[OFF_SIG_CG_FLAG_CTX + (bIsLuma ? 0 : NUM_SIG_CG_FLAG_CTX)];
1910     uint8_t * const baseCtx = bIsLuma ? &m_contextState[OFF_SIG_FLAG_CTX] : &m_contextState[OFF_SIG_FLAG_CTX + NUM_SIG_FLAG_CTX_LUMA];
1911     uint32_t c1 = 1;
1912     int scanPosSigOff = scanPosLast - (lastScanSet << MLS_CG_SIZE) - 1;
1913     ALIGN_VAR_32(uint16_t, absCoeff[(1 << MLS_CG_SIZE) + 1]);   // extra 2 bytes(+1) space for AVX2 assembly, +1 because (numNonZero<=1) in costCoeffNxN path
1914     uint32_t numNonZero = 1;
1915     unsigned long lastNZPosInCG;
1916     unsigned long firstNZPosInCG;
1917 
1918 #if _DEBUG
1919     // Unnecessary, for Valgrind-3.10.0 only
1920     memset(absCoeff, 0, sizeof(absCoeff));
1921 #endif
1922 
1923     absCoeff[0] = (uint16_t)abs(coeff[posLast]);
1924 
1925     for (int subSet = lastScanSet; subSet >= 0; subSet--)
1926     {
1927         const uint32_t subCoeffFlag = coeffFlag[subSet];
1928         uint32_t scanFlagMask = subCoeffFlag;
1929         int subPosBase = subSet << MLS_CG_SIZE;
1930 
1931         if (subSet == lastScanSet)
1932         {
1933             X265_CHECK(scanPosSigOff == scanPosLast - (lastScanSet << MLS_CG_SIZE) - 1, "scanPos mistake\n");
1934             scanFlagMask >>= 1;
1935         }
1936 
1937         // encode significant_coeffgroup_flag
1938         const int cgBlkPos = codingParameters.scanCG[subSet];
1939         const int cgPosY   = (uint32_t)cgBlkPos >> (log2TrSize - MLS_CG_LOG2_SIZE);
1940         const int cgPosX   = cgBlkPos & ((1 << (log2TrSize - MLS_CG_LOG2_SIZE)) - 1);
1941         const uint64_t cgBlkPosMask = ((uint64_t)1 << cgBlkPos);
1942 
1943         if (subSet == lastScanSet || !subSet)
1944             sigCoeffGroupFlag64 |= cgBlkPosMask;
1945         else
1946         {
1947             uint32_t sigCoeffGroup = ((sigCoeffGroupFlag64 & cgBlkPosMask) != 0);
1948             uint32_t ctxSig = Quant::getSigCoeffGroupCtxInc(sigCoeffGroupFlag64, cgPosX, cgPosY, cgBlkPos, (trSize >> MLS_CG_LOG2_SIZE));
1949             encodeBin(sigCoeffGroup, baseCoeffGroupCtx[ctxSig]);
1950         }
1951 
1952         // encode significant_coeff_flag
1953         if ((scanPosSigOff >= 0) && (sigCoeffGroupFlag64 & cgBlkPosMask))
1954         {
1955             X265_CHECK((log2TrSize != 2) || (log2TrSize == 2 && subSet == 0), "log2TrSize and subSet mistake!\n");
1956             const int patternSigCtx = Quant::calcPatternSigCtx(sigCoeffGroupFlag64, cgPosX, cgPosY, cgBlkPos, (trSize >> MLS_CG_LOG2_SIZE));
1957             const uint32_t posOffset = (bIsLuma && subSet) ? 3 : 0;
1958 
1959             // NOTE: [patternSigCtx][posXinSubset][posYinSubset]
1960             static const uint8_t table_cnt[5][SCAN_SET_SIZE] =
1961             {
1962                 // patternSigCtx = 0
1963                 {
1964                     2, 1, 1, 0,
1965                     1, 1, 0, 0,
1966                     1, 0, 0, 0,
1967                     0, 0, 0, 0,
1968                 },
1969                 // patternSigCtx = 1
1970                 {
1971                     2, 2, 2, 2,
1972                     1, 1, 1, 1,
1973                     0, 0, 0, 0,
1974                     0, 0, 0, 0,
1975                 },
1976                 // patternSigCtx = 2
1977                 {
1978                     2, 1, 0, 0,
1979                     2, 1, 0, 0,
1980                     2, 1, 0, 0,
1981                     2, 1, 0, 0,
1982                 },
1983                 // patternSigCtx = 3
1984                 {
1985                     2, 2, 2, 2,
1986                     2, 2, 2, 2,
1987                     2, 2, 2, 2,
1988                     2, 2, 2, 2,
1989                 },
1990                 // 4x4
1991                 {
1992                     0, 1, 4, 5,
1993                     2, 3, 4, 5,
1994                     6, 6, 8, 8,
1995                     7, 7, 8, 8
1996                 }
1997             };
1998 
1999             const int offset = codingParameters.firstSignificanceMapContext;
2000             const uint32_t blkPosBase  = codingParameters.scan[subPosBase];
2001 
2002             X265_CHECK(scanPosSigOff >= 0, "scanPosSigOff check failure\n");
2003             if (m_bitIf)
2004             {
2005                 ALIGN_VAR_32(uint16_t, tmpCoeff[SCAN_SET_SIZE]);
2006 
2007                 // TODO: accelerate by PABSW
2008                 for (int i = 0; i < MLS_CG_SIZE; i++)
2009                 {
2010                     tmpCoeff[i * MLS_CG_SIZE + 0] = (uint16_t)abs(coeff[blkPosBase + i * trSize + 0]);
2011                     tmpCoeff[i * MLS_CG_SIZE + 1] = (uint16_t)abs(coeff[blkPosBase + i * trSize + 1]);
2012                     tmpCoeff[i * MLS_CG_SIZE + 2] = (uint16_t)abs(coeff[blkPosBase + i * trSize + 2]);
2013                     tmpCoeff[i * MLS_CG_SIZE + 3] = (uint16_t)abs(coeff[blkPosBase + i * trSize + 3]);
2014                 }
2015 
2016                 if (log2TrSize == 2)
2017                 {
2018                     do
2019                     {
2020                         uint32_t blkPos, sig, ctxSig;
2021                         blkPos = g_scan4x4[codingParameters.scanType][scanPosSigOff];
2022                         sig     = scanFlagMask & 1;
2023                         scanFlagMask >>= 1;
2024                         X265_CHECK((uint32_t)(tmpCoeff[blkPos] != 0) == sig, "sign bit mistake\n");
2025                         {
2026                             ctxSig = table_cnt[4][blkPos];
2027                             X265_CHECK(ctxSig == Quant::getSigCtxInc(patternSigCtx, log2TrSize, trSize, blkPos, bIsLuma, codingParameters.firstSignificanceMapContext), "sigCtx mistake!\n");;
2028                             encodeBin(sig, baseCtx[ctxSig]);
2029                         }
2030                         absCoeff[numNonZero] = tmpCoeff[blkPos];
2031                         numNonZero += sig;
2032                         scanPosSigOff--;
2033                     }
2034                     while(scanPosSigOff >= 0);
2035                 }
2036                 else
2037                 {
2038                     X265_CHECK((log2TrSize > 2), "log2TrSize must be more than 2 in this path!\n");
2039 
2040                     const uint8_t *tabSigCtx = table_cnt[(uint32_t)patternSigCtx];
2041                     do
2042                     {
2043                         uint32_t blkPos, sig, ctxSig;
2044                         blkPos = g_scan4x4[codingParameters.scanType][scanPosSigOff];
2045                         const uint32_t posZeroMask = (subPosBase + scanPosSigOff) ? ~0 : 0;
2046                         sig     = scanFlagMask & 1;
2047                         scanFlagMask >>= 1;
2048                         X265_CHECK((uint32_t)(tmpCoeff[blkPos] != 0) == sig, "sign bit mistake\n");
2049                         if (scanPosSigOff != 0 || subSet == 0 || numNonZero)
2050                         {
2051                             const uint32_t cnt = tabSigCtx[blkPos] + offset;
2052                             ctxSig = (cnt + posOffset) & posZeroMask;
2053 
2054                             X265_CHECK(ctxSig == Quant::getSigCtxInc(patternSigCtx, log2TrSize, trSize, codingParameters.scan[subPosBase + scanPosSigOff], bIsLuma, codingParameters.firstSignificanceMapContext), "sigCtx mistake!\n");;
2055                             encodeBin(sig, baseCtx[ctxSig]);
2056                         }
2057                         absCoeff[numNonZero] = tmpCoeff[blkPos];
2058                         numNonZero += sig;
2059                         scanPosSigOff--;
2060                     }
2061                     while(scanPosSigOff >= 0);
2062                 }
2063             }
2064             else // fast RD path
2065             {
2066                 // maximum g_entropyBits are 18-bits and maximum of count are 16, so intermedia of sum are 22-bits
2067                 const uint8_t *tabSigCtx = table_cnt[(log2TrSize == 2) ? 4 : (uint32_t)patternSigCtx];
2068                 X265_CHECK(numNonZero <= 1, "numNonZero check failure");
2069                 uint32_t sum = primitives.costCoeffNxN(g_scan4x4[codingParameters.scanType], &coeff[blkPosBase], (intptr_t)trSize, absCoeff + numNonZero, tabSigCtx, scanFlagMask, baseCtx, offset + posOffset, scanPosSigOff, subPosBase);
2070 
2071 #if CHECKED_BUILD || _DEBUG
2072                 numNonZero = coeffNum[subSet];
2073 #endif
2074                 // update RD cost
2075                 m_fracBits += sum;
2076             } // end of fast RD path -- !m_bitIf
2077         }
2078         X265_CHECK(coeffNum[subSet] == numNonZero, "coefNum mistake\n");
2079 
2080         uint32_t coeffSigns = coeffSign[subSet];
2081         numNonZero = coeffNum[subSet];
2082         if (numNonZero > 0)
2083         {
2084             uint32_t idx;
2085             X265_CHECK(subCoeffFlag > 0, "subCoeffFlag is zero\n");
2086             CLZ(lastNZPosInCG, subCoeffFlag);
2087             CTZ(firstNZPosInCG, subCoeffFlag);
2088 
2089             bool signHidden = (lastNZPosInCG - firstNZPosInCG >= SBH_THRESHOLD);
2090             const uint8_t ctxSet = (((subSet > 0) + bIsLuma) & 2) + !(c1 & 3);
2091             X265_CHECK((((subSet > 0) & bIsLuma) ? 2 : 0) + !(c1 & 3) == ctxSet, "ctxSet check failure\n");
2092 
2093             c1 = 1;
2094             uint8_t *baseCtxMod = &m_contextState[(bIsLuma ? 0 : NUM_ONE_FLAG_CTX_LUMA) + OFF_ONE_FLAG_CTX + 4 * ctxSet];
2095 
2096             uint32_t numC1Flag = X265_MIN(numNonZero, C1FLAG_NUMBER);
2097             X265_CHECK(numC1Flag > 0, "numC1Flag check failure\n");
2098 
2099             if (!m_bitIf)
2100             {
2101                 uint32_t sum = primitives.costC1C2Flag(absCoeff, numC1Flag, baseCtxMod, (bIsLuma ? 0 : NUM_ABS_FLAG_CTX_LUMA - NUM_ONE_FLAG_CTX_LUMA) + (OFF_ABS_FLAG_CTX - OFF_ONE_FLAG_CTX) - 3 * ctxSet);
2102                 uint32_t firstC2Idx = (sum >> 28);
2103                 c1 = ((sum >> 26) & 3);
2104                 m_fracBits += sum & 0x00FFFFFF;
2105 
2106                 const int hiddenShift = (bHideFirstSign & signHidden) ? -1 : 0;
2107                 //encodeBinsEP((coeffSigns >> hiddenShift), numNonZero - hiddenShift);
2108                 m_fracBits += (numNonZero + hiddenShift) << 15;
2109 
2110                 if (numNonZero > firstC2Idx)
2111                 {
2112                     sum = primitives.costCoeffRemain(absCoeff, numNonZero, firstC2Idx);
2113                     X265_CHECK(sum == costCoeffRemain_c0(absCoeff, numNonZero), "costCoeffRemain check failure\n");
2114                     m_fracBits += ((uint64_t)sum << 15);
2115                 }
2116             }
2117             // Standard path
2118             else
2119             {
2120                 uint32_t firstC2Idx = 8;
2121                 uint32_t firstC2Flag = 2;
2122                 uint32_t c1Next = 0xFFFFFFFE;
2123 
2124                 idx = 0;
2125                 do
2126                 {
2127                     const uint32_t symbol1 = absCoeff[idx] > 1;
2128                     const uint32_t symbol2 = absCoeff[idx] > 2;
2129                     encodeBin(symbol1, baseCtxMod[c1]);
2130 
2131                     if (symbol1)
2132                         c1Next = 0;
2133 
2134                     firstC2Flag = (symbol1 + firstC2Flag == 3) ? symbol2 : firstC2Flag;
2135                     firstC2Idx  = (symbol1 + firstC2Idx == 9) ? idx : firstC2Idx;
2136 
2137                     c1 = (c1Next & 3);
2138                     c1Next >>= 2;
2139                     X265_CHECK(c1 <= 3, "c1 check failure\n");
2140                     idx++;
2141                 }
2142                 while(idx < numC1Flag);
2143 
2144                 if (!c1)
2145                 {
2146                     baseCtxMod = &m_contextState[(bIsLuma ? 0 : NUM_ABS_FLAG_CTX_LUMA) + OFF_ABS_FLAG_CTX + ctxSet];
2147 
2148                     X265_CHECK((firstC2Flag <= 1), "firstC2FlagIdx check failure\n");
2149                     encodeBin(firstC2Flag, baseCtxMod[0]);
2150                 }
2151 
2152                 const int hiddenShift = (bHideFirstSign && signHidden) ? 1 : 0;
2153                 encodeBinsEP((coeffSigns >> hiddenShift), numNonZero - hiddenShift);
2154 
2155                 if (!c1 || numNonZero > C1FLAG_NUMBER)
2156                 {
2157                     // Standard path
2158                     uint32_t goRiceParam = 0;
2159                     int baseLevel = 3;
2160                     uint32_t threshold = COEF_REMAIN_BIN_REDUCTION;
2161 #if CHECKED_BUILD || _DEBUG
2162                     int firstCoeff2 = 1;
2163 #endif
2164                     idx = firstC2Idx;
2165                     do
2166                     {
2167                         if (idx >= C1FLAG_NUMBER)
2168                             baseLevel = 1;
2169                         // TODO: fast algorithm maybe broken this check logic
2170                         X265_CHECK(baseLevel == ((idx < C1FLAG_NUMBER) ? (2 + firstCoeff2) : 1), "baseLevel check failurr\n");
2171 
2172                         if (absCoeff[idx] >= baseLevel)
2173                         {
2174                             writeCoefRemainExGolomb(absCoeff[idx] - baseLevel, goRiceParam);
2175                             X265_CHECK(threshold == (uint32_t)(COEF_REMAIN_BIN_REDUCTION << goRiceParam), "COEF_REMAIN_BIN_REDUCTION check failure\n");
2176                             const int adjust = (absCoeff[idx] > threshold) & (goRiceParam <= 3);
2177                             goRiceParam += adjust;
2178                             threshold += (adjust) ? threshold : 0;
2179                             X265_CHECK(goRiceParam <= 4, "goRiceParam check failure\n");
2180                         }
2181 #if CHECKED_BUILD || _DEBUG
2182                         firstCoeff2 = 0;
2183 #endif
2184                         baseLevel = 2;
2185                         idx++;
2186                     }
2187                     while(idx < numNonZero);
2188                 }
2189             } // end of !bitIf
2190         } // end of (numNonZero > 0)
2191 
2192         // Initialize value for next loop
2193         numNonZero = 0;
2194         scanPosSigOff = (1 << MLS_CG_SIZE) - 1;
2195     }
2196 }
2197 
codeSaoMaxUvlc(uint32_t code,uint32_t maxSymbol)2198 void Entropy::codeSaoMaxUvlc(uint32_t code, uint32_t maxSymbol)
2199 {
2200     X265_CHECK(maxSymbol > 0, "maxSymbol too small\n");
2201 
2202     uint32_t isCodeNonZero = !!code;
2203 
2204     encodeBinEP(isCodeNonZero);
2205     if (isCodeNonZero)
2206     {
2207         uint32_t isCodeLast = (maxSymbol > code);
2208         uint32_t mask = (1 << (code - 1)) - 1;
2209         uint32_t len = code - 1 + isCodeLast;
2210         mask <<= isCodeLast;
2211 
2212         encodeBinsEP(mask, len);
2213     }
2214 }
2215 
2216 /* estimate bit cost for CBP, significant map and significant coefficients */
estBit(EstBitsSbac & estBitsSbac,uint32_t log2TrSize,bool bIsLuma) const2217 void Entropy::estBit(EstBitsSbac& estBitsSbac, uint32_t log2TrSize, bool bIsLuma) const
2218 {
2219     estCBFBit(estBitsSbac);
2220 
2221     estSignificantCoeffGroupMapBit(estBitsSbac, bIsLuma);
2222 
2223     // encode significance map
2224     estSignificantMapBit(estBitsSbac, log2TrSize, bIsLuma);
2225 
2226     // encode significant coefficients
2227     estSignificantCoefficientsBit(estBitsSbac, bIsLuma);
2228 }
2229 
2230 /* estimate bit cost for each CBP bit */
estCBFBit(EstBitsSbac & estBitsSbac) const2231 void Entropy::estCBFBit(EstBitsSbac& estBitsSbac) const
2232 {
2233     const uint8_t *ctx = &m_contextState[OFF_QT_CBF_CTX];
2234 
2235     for (uint32_t ctxInc = 0; ctxInc < NUM_QT_CBF_CTX; ctxInc++)
2236     {
2237         estBitsSbac.blockCbpBits[ctxInc][0] = sbacGetEntropyBits(ctx[ctxInc], 0);
2238         estBitsSbac.blockCbpBits[ctxInc][1] = sbacGetEntropyBits(ctx[ctxInc], 1);
2239     }
2240 
2241     ctx = &m_contextState[OFF_QT_ROOT_CBF_CTX];
2242 
2243     estBitsSbac.blockRootCbpBits[0] = sbacGetEntropyBits(ctx[0], 0);
2244     estBitsSbac.blockRootCbpBits[1] = sbacGetEntropyBits(ctx[0], 1);
2245 }
2246 
2247 /* estimate SAMBAC bit cost for significant coefficient group map */
estSignificantCoeffGroupMapBit(EstBitsSbac & estBitsSbac,bool bIsLuma) const2248 void Entropy::estSignificantCoeffGroupMapBit(EstBitsSbac& estBitsSbac, bool bIsLuma) const
2249 {
2250     int firstCtx = 0, numCtx = NUM_SIG_CG_FLAG_CTX;
2251 
2252     for (int ctxIdx = firstCtx; ctxIdx < firstCtx + numCtx; ctxIdx++)
2253         for (uint32_t bin = 0; bin < 2; bin++)
2254             estBitsSbac.significantCoeffGroupBits[ctxIdx][bin] = sbacGetEntropyBits(m_contextState[OFF_SIG_CG_FLAG_CTX + ((bIsLuma ? 0 : NUM_SIG_CG_FLAG_CTX) + ctxIdx)], bin);
2255 }
2256 
2257 /* estimate SAMBAC bit cost for significant coefficient map */
estSignificantMapBit(EstBitsSbac & estBitsSbac,uint32_t log2TrSize,bool bIsLuma) const2258 void Entropy::estSignificantMapBit(EstBitsSbac& estBitsSbac, uint32_t log2TrSize, bool bIsLuma) const
2259 {
2260     int firstCtx = 1, numCtx = 8;
2261 
2262     if (log2TrSize >= 4)
2263     {
2264         firstCtx = bIsLuma ? 21 : 12;
2265         numCtx = bIsLuma ? 6 : 3;
2266     }
2267     else if (log2TrSize == 3)
2268     {
2269         firstCtx = 9;
2270         numCtx = bIsLuma ? 12 : 3;
2271     }
2272 
2273     const int ctxSigOffset = OFF_SIG_FLAG_CTX + (bIsLuma ? 0 : NUM_SIG_FLAG_CTX_LUMA);
2274 
2275     estBitsSbac.significantBits[0][0] = sbacGetEntropyBits(m_contextState[ctxSigOffset], 0);
2276     estBitsSbac.significantBits[1][0] = sbacGetEntropyBits(m_contextState[ctxSigOffset], 1);
2277 
2278     for (int ctxIdx = firstCtx; ctxIdx < firstCtx + numCtx; ctxIdx++)
2279     {
2280         estBitsSbac.significantBits[0][ctxIdx] = sbacGetEntropyBits(m_contextState[ctxSigOffset + ctxIdx], 0);
2281         estBitsSbac.significantBits[1][ctxIdx] = sbacGetEntropyBits(m_contextState[ctxSigOffset + ctxIdx], 1);
2282     }
2283 
2284     const uint32_t maxGroupIdx = log2TrSize * 2 - 1;
2285     if (bIsLuma)
2286     {
2287         if (log2TrSize == 2)
2288         {
2289             for (int i = 0, ctxIdx = 0; i < 2; i++, ctxIdx += NUM_CTX_LAST_FLAG_XY)
2290             {
2291                 int bits = 0;
2292                 const uint8_t *ctxState = &m_contextState[OFF_CTX_LAST_FLAG_X + ctxIdx];
2293 
2294                 for (uint32_t ctx = 0; ctx < 3; ctx++)
2295                 {
2296                     estBitsSbac.lastBits[i][ctx] = bits + sbacGetEntropyBits(ctxState[ctx], 0);
2297                     bits += sbacGetEntropyBits(ctxState[ctx], 1);
2298                 }
2299 
2300                 estBitsSbac.lastBits[i][maxGroupIdx] = bits;
2301             }
2302         }
2303         else
2304         {
2305             const int blkSizeOffset = ((log2TrSize - 2) * 3 + (log2TrSize == 5));
2306 
2307             for (int i = 0, ctxIdx = 0; i < 2; i++, ctxIdx += NUM_CTX_LAST_FLAG_XY)
2308             {
2309                 int bits = 0;
2310                 const uint8_t *ctxState = &m_contextState[OFF_CTX_LAST_FLAG_X + ctxIdx];
2311                 X265_CHECK(maxGroupIdx & 1, "maxGroupIdx check failure\n");
2312 
2313                 for (uint32_t ctx = 0; ctx < (maxGroupIdx >> 1) + 1; ctx++)
2314                 {
2315                     const int cost0 = sbacGetEntropyBits(ctxState[blkSizeOffset + ctx], 0);
2316                     const int cost1 = sbacGetEntropyBits(ctxState[blkSizeOffset + ctx], 1);
2317                     estBitsSbac.lastBits[i][ctx * 2 + 0] = bits + cost0;
2318                     estBitsSbac.lastBits[i][ctx * 2 + 1] = bits + cost1 + cost0;
2319                     bits += 2 * cost1;
2320                 }
2321                 // correct latest bit cost, it didn't include cost0
2322                 estBitsSbac.lastBits[i][maxGroupIdx] -= sbacGetEntropyBits(ctxState[blkSizeOffset + (maxGroupIdx >> 1)], 0);
2323             }
2324         }
2325     }
2326     else
2327     {
2328         const int blkSizeOffset = NUM_CTX_LAST_FLAG_XY_LUMA;
2329         const int ctxShift = log2TrSize - 2;
2330 
2331         for (int i = 0, ctxIdx = 0; i < 2; i++, ctxIdx += NUM_CTX_LAST_FLAG_XY)
2332         {
2333             int bits = 0;
2334             const uint8_t *ctxState = &m_contextState[OFF_CTX_LAST_FLAG_X + ctxIdx];
2335 
2336             for (uint32_t ctx = 0; ctx < maxGroupIdx; ctx++)
2337             {
2338                 int ctxOffset = blkSizeOffset + (ctx >> ctxShift);
2339                 estBitsSbac.lastBits[i][ctx] = bits + sbacGetEntropyBits(ctxState[ctxOffset], 0);
2340                 bits += sbacGetEntropyBits(ctxState[ctxOffset], 1);
2341             }
2342 
2343             estBitsSbac.lastBits[i][maxGroupIdx] = bits;
2344         }
2345     }
2346 }
2347 
2348 /* estimate bit cost of significant coefficient */
estSignificantCoefficientsBit(EstBitsSbac & estBitsSbac,bool bIsLuma) const2349 void Entropy::estSignificantCoefficientsBit(EstBitsSbac& estBitsSbac, bool bIsLuma) const
2350 {
2351     if (bIsLuma)
2352     {
2353         const uint8_t *ctxOne = &m_contextState[OFF_ONE_FLAG_CTX];
2354         const uint8_t *ctxAbs = &m_contextState[OFF_ABS_FLAG_CTX];
2355 
2356         for (int ctxIdx = 0; ctxIdx < NUM_ONE_FLAG_CTX_LUMA; ctxIdx++)
2357         {
2358             estBitsSbac.greaterOneBits[ctxIdx][0] = sbacGetEntropyBits(ctxOne[ctxIdx], 0);
2359             estBitsSbac.greaterOneBits[ctxIdx][1] = sbacGetEntropyBits(ctxOne[ctxIdx], 1);
2360         }
2361 
2362         for (int ctxIdx = 0; ctxIdx < NUM_ABS_FLAG_CTX_LUMA; ctxIdx++)
2363         {
2364             estBitsSbac.levelAbsBits[ctxIdx][0] = sbacGetEntropyBits(ctxAbs[ctxIdx], 0);
2365             estBitsSbac.levelAbsBits[ctxIdx][1] = sbacGetEntropyBits(ctxAbs[ctxIdx], 1);
2366         }
2367     }
2368     else
2369     {
2370         const uint8_t *ctxOne = &m_contextState[OFF_ONE_FLAG_CTX + NUM_ONE_FLAG_CTX_LUMA];
2371         const uint8_t *ctxAbs = &m_contextState[OFF_ABS_FLAG_CTX + NUM_ABS_FLAG_CTX_LUMA];
2372 
2373         for (int ctxIdx = 0; ctxIdx < NUM_ONE_FLAG_CTX_CHROMA; ctxIdx++)
2374         {
2375             estBitsSbac.greaterOneBits[ctxIdx][0] = sbacGetEntropyBits(ctxOne[ctxIdx], 0);
2376             estBitsSbac.greaterOneBits[ctxIdx][1] = sbacGetEntropyBits(ctxOne[ctxIdx], 1);
2377         }
2378 
2379         for (int ctxIdx = 0; ctxIdx < NUM_ABS_FLAG_CTX_CHROMA; ctxIdx++)
2380         {
2381             estBitsSbac.levelAbsBits[ctxIdx][0] = sbacGetEntropyBits(ctxAbs[ctxIdx], 0);
2382             estBitsSbac.levelAbsBits[ctxIdx][1] = sbacGetEntropyBits(ctxAbs[ctxIdx], 1);
2383         }
2384     }
2385 }
2386 
2387 /* Initialize our context information from the nominated source */
copyContextsFrom(const Entropy & src)2388 void Entropy::copyContextsFrom(const Entropy& src)
2389 {
2390     X265_CHECK(src.m_valid, "invalid copy source context\n");
2391 
2392     memcpy(m_contextState, src.m_contextState, MAX_OFF_CTX_MOD * sizeof(m_contextState[0]));
2393     markValid();
2394 }
2395 
start()2396 void Entropy::start()
2397 {
2398     m_low = 0;
2399     m_range = 510;
2400     m_bitsLeft = -12;
2401     m_numBufferedBytes = 0;
2402     m_bufferedByte = 0xff;
2403 }
2404 
finish()2405 void Entropy::finish()
2406 {
2407     if (m_low >> (21 + m_bitsLeft))
2408     {
2409         m_bitIf->writeByte(m_bufferedByte + 1);
2410         while (m_numBufferedBytes > 1)
2411         {
2412             m_bitIf->writeByte(0x00);
2413             m_numBufferedBytes--;
2414         }
2415 
2416         m_low -= 1 << (21 + m_bitsLeft);
2417     }
2418     else
2419     {
2420         if (m_numBufferedBytes > 0)
2421             m_bitIf->writeByte(m_bufferedByte);
2422 
2423         while (m_numBufferedBytes > 1)
2424         {
2425             m_bitIf->writeByte(0xff);
2426             m_numBufferedBytes--;
2427         }
2428     }
2429     m_bitIf->write(m_low >> 8, 13 + m_bitsLeft);
2430 }
2431 
copyState(const Entropy & other)2432 void Entropy::copyState(const Entropy& other)
2433 {
2434     m_low = other.m_low;
2435     m_range = other.m_range;
2436     m_bitsLeft = other.m_bitsLeft;
2437     m_bufferedByte = other.m_bufferedByte;
2438     m_numBufferedBytes = other.m_numBufferedBytes;
2439     m_fracBits = other.m_fracBits;
2440 }
2441 
resetBits()2442 void Entropy::resetBits()
2443 {
2444     m_low = 0;
2445     m_bitsLeft = -12;
2446     m_numBufferedBytes = 0;
2447     m_bufferedByte = 0xff;
2448     m_fracBits &= 32767;
2449     if (m_bitIf)
2450         m_bitIf->resetBits();
2451 }
2452 
2453 /** Encode bin */
encodeBin(uint32_t binValue,uint8_t & ctxModel)2454 void Entropy::encodeBin(uint32_t binValue, uint8_t &ctxModel)
2455 {
2456     uint32_t mstate = ctxModel;
2457 
2458     ctxModel = sbacNext(mstate, binValue);
2459 
2460     if (!m_bitIf)
2461     {
2462         m_fracBits += sbacGetEntropyBits(mstate, binValue);
2463         return;
2464     }
2465 
2466     uint32_t range = m_range;
2467     uint32_t state = sbacGetState(mstate);
2468     uint32_t lps = g_lpsTable[state][((uint8_t)range >> 6)];
2469     range -= lps;
2470 
2471     X265_CHECK(lps >= 2, "lps is too small\n");
2472 
2473     int numBits = (uint32_t)(range - 256) >> 31;
2474     uint32_t low = m_low;
2475 
2476     // NOTE: MPS must be LOWEST bit in mstate
2477     X265_CHECK((uint32_t)((binValue ^ mstate) & 1) == (uint32_t)(binValue != sbacGetMps(mstate)), "binValue failure\n");
2478     if ((binValue ^ mstate) & 1)
2479     {
2480         // NOTE: lps is non-zero and the maximum of idx is 8 because lps less than 256
2481         //numBits = g_renormTable[lps >> 3];
2482         unsigned long idx;
2483         CLZ(idx, lps);
2484         X265_CHECK(state != 63 || idx == 1, "state failure\n");
2485 
2486         numBits = 8 - idx;
2487         if (state >= 63)
2488             numBits = 6;
2489         X265_CHECK(numBits <= 6, "numBits failure\n");
2490 
2491         low += range;
2492         range = lps;
2493     }
2494     m_low = (low << numBits);
2495     m_range = (range << numBits);
2496     m_bitsLeft += numBits;
2497 
2498     if (m_bitsLeft >= 0)
2499         writeOut();
2500 }
2501 
2502 /** Encode equiprobable bin */
encodeBinEP(uint32_t binValue)2503 void Entropy::encodeBinEP(uint32_t binValue)
2504 {
2505     if (!m_bitIf)
2506     {
2507         m_fracBits += 32768;
2508         return;
2509     }
2510     m_low <<= 1;
2511     if (binValue)
2512         m_low += m_range;
2513     m_bitsLeft++;
2514 
2515     if (m_bitsLeft >= 0)
2516         writeOut();
2517 }
2518 
2519 /** Encode equiprobable bins */
encodeBinsEP(uint32_t binValues,int numBins)2520 void Entropy::encodeBinsEP(uint32_t binValues, int numBins)
2521 {
2522     if (!m_bitIf)
2523     {
2524         m_fracBits += 32768 * numBins;
2525         return;
2526     }
2527 
2528     while (numBins > 8)
2529     {
2530         numBins -= 8;
2531         uint32_t pattern = binValues >> numBins;
2532         m_low <<= 8;
2533         m_low += m_range * pattern;
2534         binValues -= pattern << numBins;
2535         m_bitsLeft += 8;
2536 
2537         if (m_bitsLeft >= 0)
2538             writeOut();
2539     }
2540 
2541     m_low <<= numBins;
2542     m_low += m_range * binValues;
2543     m_bitsLeft += numBins;
2544 
2545     if (m_bitsLeft >= 0)
2546         writeOut();
2547 }
2548 
2549 /** Encode terminating bin */
encodeBinTrm(uint32_t binValue)2550 void Entropy::encodeBinTrm(uint32_t binValue)
2551 {
2552     if (!m_bitIf)
2553     {
2554         m_fracBits += sbacGetEntropyBitsTrm(binValue);
2555         return;
2556     }
2557 
2558     m_range -= 2;
2559     if (binValue)
2560     {
2561         m_low += m_range;
2562         m_low <<= 7;
2563         m_range = 2 << 7;
2564         m_bitsLeft += 7;
2565     }
2566     else if (m_range >= 256)
2567         return;
2568     else
2569     {
2570         m_low <<= 1;
2571         m_range <<= 1;
2572         m_bitsLeft++;
2573     }
2574 
2575     if (m_bitsLeft >= 0)
2576         writeOut();
2577 }
2578 
2579 /** Move bits from register into bitstream */
writeOut()2580 void Entropy::writeOut()
2581 {
2582     uint32_t leadByte = m_low >> (13 + m_bitsLeft);
2583     uint32_t low_mask = (uint32_t)(~0) >> (11 + 8 - m_bitsLeft);
2584 
2585     m_bitsLeft -= 8;
2586     m_low &= low_mask;
2587 
2588     if (leadByte == 0xff)
2589         m_numBufferedBytes++;
2590     else
2591     {
2592         uint32_t numBufferedBytes = m_numBufferedBytes;
2593         if (numBufferedBytes > 0)
2594         {
2595             uint32_t carry = leadByte >> 8;
2596             uint32_t byteTowrite = m_bufferedByte + carry;
2597             m_bitIf->writeByte(byteTowrite);
2598 
2599             byteTowrite = (0xff + carry) & 0xff;
2600             while (numBufferedBytes > 1)
2601             {
2602                 m_bitIf->writeByte(byteTowrite);
2603                 numBufferedBytes--;
2604             }
2605         }
2606         m_numBufferedBytes = 1;
2607         m_bufferedByte = (uint8_t)leadByte;
2608     }
2609 }
2610 
2611 const uint32_t g_entropyBits[128] =
2612 {
2613     // Corrected table, most notably for last state
2614     0x07b23, 0x085f9, 0x074a0, 0x08cbc, 0x06ee4, 0x09354, 0x067f4, 0x09c1b, 0x060b0, 0x0a62a, 0x05a9c, 0x0af5b, 0x0548d, 0x0b955, 0x04f56, 0x0c2a9,
2615     0x04a87, 0x0cbf7, 0x045d6, 0x0d5c3, 0x04144, 0x0e01b, 0x03d88, 0x0e937, 0x039e0, 0x0f2cd, 0x03663, 0x0fc9e, 0x03347, 0x10600, 0x03050, 0x10f95,
2616     0x02d4d, 0x11a02, 0x02ad3, 0x12333, 0x0286e, 0x12cad, 0x02604, 0x136df, 0x02425, 0x13f48, 0x021f4, 0x149c4, 0x0203e, 0x1527b, 0x01e4d, 0x15d00,
2617     0x01c99, 0x166de, 0x01b18, 0x17017, 0x019a5, 0x17988, 0x01841, 0x18327, 0x016df, 0x18d50, 0x015d9, 0x19547, 0x0147c, 0x1a083, 0x0138e, 0x1a8a3,
2618     0x01251, 0x1b418, 0x01166, 0x1bd27, 0x01068, 0x1c77b, 0x00f7f, 0x1d18e, 0x00eda, 0x1d91a, 0x00e19, 0x1e254, 0x00d4f, 0x1ec9a, 0x00c90, 0x1f6e0,
2619     0x00c01, 0x1fef8, 0x00b5f, 0x208b1, 0x00ab6, 0x21362, 0x00a15, 0x21e46, 0x00988, 0x2285d, 0x00934, 0x22ea8, 0x008a8, 0x239b2, 0x0081d, 0x24577,
2620     0x007c9, 0x24ce6, 0x00763, 0x25663, 0x00710, 0x25e8f, 0x006a0, 0x26a26, 0x00672, 0x26f23, 0x005e8, 0x27ef8, 0x005ba, 0x284b5, 0x0055e, 0x29057,
2621     0x0050c, 0x29bab, 0x004c1, 0x2a674, 0x004a7, 0x2aa5e, 0x0046f, 0x2b32f, 0x0041f, 0x2c0ad, 0x003e7, 0x2ca8d, 0x003ba, 0x2d323, 0x0010c, 0x3bfbb
2622 };
2623 
2624 const uint8_t g_nextState[128][2] =
2625 {
2626     { 2, 1 }, { 0, 3 }, { 4, 0 }, { 1, 5 }, { 6, 2 }, { 3, 7 }, { 8, 4 }, { 5, 9 },
2627     { 10, 4 }, { 5, 11 }, { 12, 8 }, { 9, 13 }, { 14, 8 }, { 9, 15 }, { 16, 10 }, { 11, 17 },
2628     { 18, 12 }, { 13, 19 }, { 20, 14 }, { 15, 21 }, { 22, 16 }, { 17, 23 }, { 24, 18 }, { 19, 25 },
2629     { 26, 18 }, { 19, 27 }, { 28, 22 }, { 23, 29 }, { 30, 22 }, { 23, 31 }, { 32, 24 }, { 25, 33 },
2630     { 34, 26 }, { 27, 35 }, { 36, 26 }, { 27, 37 }, { 38, 30 }, { 31, 39 }, { 40, 30 }, { 31, 41 },
2631     { 42, 32 }, { 33, 43 }, { 44, 32 }, { 33, 45 }, { 46, 36 }, { 37, 47 }, { 48, 36 }, { 37, 49 },
2632     { 50, 38 }, { 39, 51 }, { 52, 38 }, { 39, 53 }, { 54, 42 }, { 43, 55 }, { 56, 42 }, { 43, 57 },
2633     { 58, 44 }, { 45, 59 }, { 60, 44 }, { 45, 61 }, { 62, 46 }, { 47, 63 }, { 64, 48 }, { 49, 65 },
2634     { 66, 48 }, { 49, 67 }, { 68, 50 }, { 51, 69 }, { 70, 52 }, { 53, 71 }, { 72, 52 }, { 53, 73 },
2635     { 74, 54 }, { 55, 75 }, { 76, 54 }, { 55, 77 }, { 78, 56 }, { 57, 79 }, { 80, 58 }, { 59, 81 },
2636     { 82, 58 }, { 59, 83 }, { 84, 60 }, { 61, 85 }, { 86, 60 }, { 61, 87 }, { 88, 60 }, { 61, 89 },
2637     { 90, 62 }, { 63, 91 }, { 92, 64 }, { 65, 93 }, { 94, 64 }, { 65, 95 }, { 96, 66 }, { 67, 97 },
2638     { 98, 66 }, { 67, 99 }, { 100, 66 }, { 67, 101 }, { 102, 68 }, { 69, 103 }, { 104, 68 }, { 69, 105 },
2639     { 106, 70 }, { 71, 107 }, { 108, 70 }, { 71, 109 }, { 110, 70 }, { 71, 111 }, { 112, 72 }, { 73, 113 },
2640     { 114, 72 }, { 73, 115 }, { 116, 72 }, { 73, 117 }, { 118, 74 }, { 75, 119 }, { 120, 74 }, { 75, 121 },
2641     { 122, 74 }, { 75, 123 }, { 124, 76 }, { 77, 125 }, { 124, 76 }, { 77, 125 }, { 126, 126 }, { 127, 127 }
2642 };
2643 
2644 }
2645 
2646 // [8 24] --> [stateMPS BitCost], [stateLPS BitCost]
2647 extern "C" const uint32_t PFX(entropyStateBits)[128] =
2648 {
2649     // Corrected table, most notably for last state
2650     0x02007B23, 0x000085F9, 0x040074A0, 0x00008CBC, 0x06006EE4, 0x02009354, 0x080067F4, 0x04009C1B,
2651     0x0A0060B0, 0x0400A62A, 0x0C005A9C, 0x0800AF5B, 0x0E00548D, 0x0800B955, 0x10004F56, 0x0A00C2A9,
2652     0x12004A87, 0x0C00CBF7, 0x140045D6, 0x0E00D5C3, 0x16004144, 0x1000E01B, 0x18003D88, 0x1200E937,
2653     0x1A0039E0, 0x1200F2CD, 0x1C003663, 0x1600FC9E, 0x1E003347, 0x16010600, 0x20003050, 0x18010F95,
2654     0x22002D4D, 0x1A011A02, 0x24002AD3, 0x1A012333, 0x2600286E, 0x1E012CAD, 0x28002604, 0x1E0136DF,
2655     0x2A002425, 0x20013F48, 0x2C0021F4, 0x200149C4, 0x2E00203E, 0x2401527B, 0x30001E4D, 0x24015D00,
2656     0x32001C99, 0x260166DE, 0x34001B18, 0x26017017, 0x360019A5, 0x2A017988, 0x38001841, 0x2A018327,
2657     0x3A0016DF, 0x2C018D50, 0x3C0015D9, 0x2C019547, 0x3E00147C, 0x2E01A083, 0x4000138E, 0x3001A8A3,
2658     0x42001251, 0x3001B418, 0x44001166, 0x3201BD27, 0x46001068, 0x3401C77B, 0x48000F7F, 0x3401D18E,
2659     0x4A000EDA, 0x3601D91A, 0x4C000E19, 0x3601E254, 0x4E000D4F, 0x3801EC9A, 0x50000C90, 0x3A01F6E0,
2660     0x52000C01, 0x3A01FEF8, 0x54000B5F, 0x3C0208B1, 0x56000AB6, 0x3C021362, 0x58000A15, 0x3C021E46,
2661     0x5A000988, 0x3E02285D, 0x5C000934, 0x40022EA8, 0x5E0008A8, 0x400239B2, 0x6000081D, 0x42024577,
2662     0x620007C9, 0x42024CE6, 0x64000763, 0x42025663, 0x66000710, 0x44025E8F, 0x680006A0, 0x44026A26,
2663     0x6A000672, 0x46026F23, 0x6C0005E8, 0x46027EF8, 0x6E0005BA, 0x460284B5, 0x7000055E, 0x48029057,
2664     0x7200050C, 0x48029BAB, 0x740004C1, 0x4802A674, 0x760004A7, 0x4A02AA5E, 0x7800046F, 0x4A02B32F,
2665     0x7A00041F, 0x4A02C0AD, 0x7C0003E7, 0x4C02CA8D, 0x7C0003BA, 0x4C02D323, 0x7E00010C, 0x7E03BFBB,
2666 };
2667 
2668