1 /*
2 * Copyright (c) 2020, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 
23 #include "encode_hevc_header_packer.h"
24 
HevcHeaderPacker()25 HevcHeaderPacker::HevcHeaderPacker()
26 {
27     for (auto it = m_rbsp.begin(); it != m_rbsp.end(); ++it)
28     {
29         *it = 0;
30     }
31 }
32 
GetNaluParams(uint8_t nal_unit_type_in,unsigned short layer_id_in,unsigned short temporal_id,mfxU16 long_start_code)33 MOS_STATUS HevcHeaderPacker::GetNaluParams(uint8_t nal_unit_type_in, unsigned short layer_id_in, unsigned short temporal_id, mfxU16 long_start_code)
34 {
35     m_naluParams.long_start_code       = long_start_code;
36     m_naluParams.nal_unit_type         = static_cast<mfxU16>(nal_unit_type_in);
37     m_naluParams.nuh_layer_id          = layer_id_in;
38     m_naluParams.nuh_temporal_id_plus1 = temporal_id + 1;
39     return MOS_STATUS_SUCCESS;
40 }
41 
GetSPSParams(PCODEC_HEVC_ENCODE_SEQUENCE_PARAMS hevcSeqParams)42 MOS_STATUS HevcHeaderPacker::GetSPSParams(PCODEC_HEVC_ENCODE_SEQUENCE_PARAMS hevcSeqParams)
43 {
44     m_spsParams.log2_min_luma_coding_block_size_minus3   = hevcSeqParams->log2_min_coding_block_size_minus3;
45     m_spsParams.log2_diff_max_min_luma_coding_block_size = hevcSeqParams->log2_max_coding_block_size_minus3 - hevcSeqParams->log2_min_coding_block_size_minus3;
46     m_spsParams.pic_width_in_luma_samples                = (hevcSeqParams->wFrameWidthInMinCbMinus1 + 1) * (1 << (hevcSeqParams->log2_min_coding_block_size_minus3 + 3));
47     m_spsParams.pic_height_in_luma_samples               = (hevcSeqParams->wFrameHeightInMinCbMinus1 + 1) * (1 << (hevcSeqParams->log2_min_coding_block_size_minus3 + 3));
48     m_spsParams.separate_colour_plane_flag               = hevcSeqParams->separate_colour_plane_flag;
49     m_spsParams.num_short_term_ref_pic_sets              = 0;  //NA
50     m_spsParams.num_long_term_ref_pics_sps               = 0;  //NA
51     m_spsParams.log2_max_pic_order_cnt_lsb_minus4        = 0;  //Related to CurrPicOrderCnt?
52     m_spsParams.long_term_ref_pics_present_flag          = 0;  //NA, default in msdk
53     m_spsParams.temporal_mvp_enabled_flag                = hevcSeqParams->sps_temporal_mvp_enable_flag;
54     m_spsParams.sample_adaptive_offset_enabled_flag      = hevcSeqParams->SAO_enabled_flag;
55     for (int i = 0; i < 65; i++)  //NA
56     {
57         m_spsParams.strps[i] = {};
58     }                                                                                          //NA
59     m_spsParams.chroma_format_idc                   = hevcSeqParams->chroma_format_idc;        //NA
60     m_spsParams.high_precision_offsets_enabled_flag = 0;                                       //NA
61     m_spsParams.bit_depth_chroma_minus8             = hevcSeqParams->bit_depth_chroma_minus8;  //NA
62     m_bDssEnabled                                   = hevcSeqParams->SliceSizeControl;
63     return MOS_STATUS_SUCCESS;
64 }
65 
GetPPSParams(PCODEC_HEVC_ENCODE_PICTURE_PARAMS hevcPicParams)66 MOS_STATUS HevcHeaderPacker::GetPPSParams(PCODEC_HEVC_ENCODE_PICTURE_PARAMS hevcPicParams)
67 {
68     m_ppsParams.dependent_slice_segments_enabled_flag       = hevcPicParams->dependent_slice_segments_enabled_flag;
69     m_ppsParams.num_extra_slice_header_bits                 = 0;
70     m_ppsParams.output_flag_present_flag                    = 0;
71     m_ppsParams.lists_modification_present_flag             = 0;                                    //NA
72     m_ppsParams.cabac_init_present_flag                     = 1;                                    //NA
73     m_ppsParams.weighted_pred_flag                          = hevcPicParams->weighted_pred_flag;    //NA
74     m_ppsParams.weighted_bipred_flag                        = hevcPicParams->weighted_bipred_flag;  //NA
75     m_ppsParams.slice_chroma_qp_offsets_present_flag        = 1;
76     m_ppsParams.deblocking_filter_override_enabled_flag     = 0;                                              //hevcPicParams->deblocking_filter_override_enabled_flag; exist but not transfered from msdk
77     m_ppsParams.loop_filter_across_slices_enabled_flag      = hevcPicParams->loop_filter_across_slices_flag;  //>=CNL,else =0
78     m_ppsParams.tiles_enabled_flag                          = hevcPicParams->tiles_enabled_flag;
79     m_ppsParams.entropy_coding_sync_enabled_flag            = hevcPicParams->entropy_coding_sync_enabled_flag;
80     m_ppsParams.slice_segment_header_extension_present_flag = 0;
81     m_ppsParams.deblocking_filter_disabled_flag             = hevcPicParams->pps_deblocking_filter_disabled_flag;
82     m_ppsParams.beta_offset_div2                            = 0;
83     m_ppsParams.tc_offset_div2                              = 0;
84     m_sliceParams.no_output_of_prior_pics_flag              = hevcPicParams->no_output_of_prior_pics_flag;
85     m_sliceParams.pic_parameter_set_id                      = hevcPicParams->slice_pic_parameter_set_id;
86     m_sliceParams.collocated_ref_idx                        = hevcPicParams->CollocatedRefPicIndex;                                                                     //NA
87     m_sliceParams.pic_order_cnt_lsb                         = hevcPicParams->CurrPicOrderCnt; // need reset poc_lsb later, store cur poc now for later lsb calculation
88     nalType                                                 = hevcPicParams->nal_unit_type;
89     return MOS_STATUS_SUCCESS;
90 }
91 
GetSliceParams(const CODEC_HEVC_ENCODE_SLICE_PARAMS hevcSliceParams)92 MOS_STATUS HevcHeaderPacker::GetSliceParams(const CODEC_HEVC_ENCODE_SLICE_PARAMS hevcSliceParams)
93 {
94     m_sliceParams.first_slice_segment_in_pic_flag = (bool)!hevcSliceParams.slice_segment_address;  //?
95     m_sliceParams.dependent_slice_segment_flag    = hevcSliceParams.dependent_slice_segment_flag;
96     m_sliceParams.segment_address                 = hevcSliceParams.slice_segment_address;
97     m_sliceParams.reserved_flags                  = 0;  //could be got, but seems always 0
98     m_sliceParams.type                            = hevcSliceParams.slice_type;
99     m_sliceParams.pic_output_flag                 = 1;
100     m_sliceParams.colour_plane_id                 = 0;  //pre=0, unused
101     m_sliceParams.short_term_ref_pic_set_sps_flag = 0;  //NA
102     for (int i = 0; i < MAX_NUM_LONG_TERM_PICS; i++)    //NA
103     {
104         m_sliceParams.lt[i] = {};
105     }                                                    //NA
106     m_sliceParams.short_term_ref_pic_set_idx      = 0;   //NA
107     m_sliceParams.strps                           = {};  //NA
108     m_sliceParams.num_long_term_sps               = 0;   //NA
109     m_sliceParams.num_long_term_pics              = 0;   //NA
110     m_sliceParams.temporal_mvp_enabled_flag       = hevcSliceParams.slice_temporal_mvp_enable_flag;
111     m_sliceParams.sao_luma_flag                   = hevcSliceParams.slice_sao_luma_flag;
112     m_sliceParams.sao_chroma_flag                 = hevcSliceParams.slice_sao_chroma_flag;
113     m_sliceParams.collocated_from_l0_flag         = hevcSliceParams.collocated_from_l0_flag;       //NA
114     m_sliceParams.num_ref_idx_l0_active_minus1    = hevcSliceParams.num_ref_idx_l0_active_minus1;  //NA
115     m_sliceParams.num_ref_idx_l1_active_minus1    = hevcSliceParams.num_ref_idx_l1_active_minus1;  //NA
116     //m_sliceParams.ref_pic_list_modification_flag_lx={0};//NA
117     //m_sliceParams.list_entry_lx=0;//NA
118     m_sliceParams.num_ref_idx_active_override_flag       = 1;                                    //NA
119     m_sliceParams.mvd_l1_zero_flag                       = hevcSliceParams.mvd_l1_zero_flag;     //NA
120     m_sliceParams.cabac_init_flag                        = hevcSliceParams.cabac_init_flag;      //NA
121     m_sliceParams.five_minus_max_num_merge_cand          = 5 - hevcSliceParams.MaxNumMergeCand;  //NA
122     m_sliceParams.slice_qp_delta                         = hevcSliceParams.slice_qp_delta;       //transfered and recieved, but wrong data/ Maybe transfered too late?
123     m_sliceParams.slice_cb_qp_offset                     = hevcSliceParams.slice_cb_qp_offset;
124     m_sliceParams.slice_cr_qp_offset                     = hevcSliceParams.slice_cr_qp_offset;
125     m_sliceParams.beta_offset_div2                       = hevcSliceParams.beta_offset_div2;
126     m_sliceParams.tc_offset_div2                         = hevcSliceParams.tc_offset_div2;
127     m_sliceParams.loop_filter_across_slices_enabled_flag = 1;  //>=CNL,else =0
128     m_sliceParams.num_entry_point_offsets                = 0;  //*= !(m_ppsParams.tiles_enabled_flag || m_ppsParams.entropy_coding_sync_enabled_flag);
129     m_sliceParams.luma_log2_weight_denom                 = 0;  //NA
130     m_sliceParams.chroma_log2_weight_denom               = 0;  //NA
131     m_sliceParams.deblocking_filter_disabled_flag        = hevcSliceParams.slice_deblocking_filter_disable_flag;
132     m_sliceParams.deblocking_filter_override_flag        = (m_sliceParams.deblocking_filter_disabled_flag != m_ppsParams.deblocking_filter_disabled_flag);
133     m_sliceParams.deblocking_filter_override_flag |=
134         !m_sliceParams.deblocking_filter_disabled_flag && (m_sliceParams.beta_offset_div2 != m_ppsParams.beta_offset_div2 || m_sliceParams.tc_offset_div2 != m_ppsParams.tc_offset_div2);
135     return MOS_STATUS_SUCCESS;
136 }
137 
PackSSH(BitstreamWriter & bs,HevcNALU const & nalu,HevcSPS const & sps,HevcPPS const & pps,HevcSlice const & slice,bool dyn_slice_size=false)138 void HevcHeaderPacker::PackSSH(
139     BitstreamWriter &bs,
140     HevcNALU const & nalu,
141     HevcSPS const &  sps,
142     HevcPPS const &  pps,
143     HevcSlice const &slice,
144     bool             dyn_slice_size = false)
145 {
146     PackNALU(bs, nalu);
147 
148     if (!dyn_slice_size)
149         PackSSHPartIdAddr(bs, nalu, sps, pps, slice);
150 
151     if (!slice.dependent_slice_segment_flag)
152         PackSSHPartIndependent(bs, nalu, sps, pps, slice);
153 
154     if (pps.tiles_enabled_flag || pps.entropy_coding_sync_enabled_flag)
155     {
156         assert(slice.num_entry_point_offsets == 0);
157 
158         bs.PutUE(slice.num_entry_point_offsets);
159     }
160 
161     assert(0 == pps.slice_segment_header_extension_present_flag);
162 
163     if (!dyn_slice_size)  // no trailing bits for dynamic slice size
164         bs.PutTrailingBits();
165 }
166 
PackNALU(BitstreamWriter & bs,NALU const & h)167 void HevcHeaderPacker::PackNALU(BitstreamWriter &bs, NALU const &h)
168 {
169     bool bLong_SC =
170         h.nal_unit_type == VPS_NUT || h.nal_unit_type == SPS_NUT || h.nal_unit_type == PPS_NUT || h.nal_unit_type == AUD_NUT || h.nal_unit_type == PREFIX_SEI_NUT || h.long_start_code;
171 
172     if (bLong_SC)
173         bs.PutBits(8, 0);  //zero_byte
174 
175     bs.PutBits(24, 0x000001);  //start_code
176 
177     bs.PutBit(0);
178     bs.PutBits(6, h.nal_unit_type);
179     bs.PutBits(6, h.nuh_layer_id);
180     bs.PutBits(3, h.nuh_temporal_id_plus1);
181 }
182 
PackSSHPartIdAddr(BitstreamWriter & bs,NALU const & nalu,SPS const & sps,PPS const & pps,Slice const & slice)183 void HevcHeaderPacker::PackSSHPartIdAddr(
184     BitstreamWriter &bs,
185     NALU const &     nalu,
186     SPS const &      sps,
187     PPS const &      pps,
188     Slice const &    slice)
189 {
190     mfxU32 MaxCU          = (1 << (sps.log2_min_luma_coding_block_size_minus3 + 3 + sps.log2_diff_max_min_luma_coding_block_size));
191     mfxU32 PicSizeInCtbsY = CeilDiv(sps.pic_width_in_luma_samples, MaxCU) * CeilDiv(sps.pic_height_in_luma_samples, MaxCU);
192 
193     bs.PutBit(slice.first_slice_segment_in_pic_flag);
194 
195     bool bIRAP = nalu.nal_unit_type >= BLA_W_LP && nalu.nal_unit_type <= RSV_IRAP_VCL23;
196 
197     if (bIRAP)
198         bs.PutBit(slice.no_output_of_prior_pics_flag);
199 
200     bs.PutUE(slice.pic_parameter_set_id);
201 
202     if (!slice.first_slice_segment_in_pic_flag)
203     {
204         if (pps.dependent_slice_segments_enabled_flag)
205             bs.PutBit(slice.dependent_slice_segment_flag);
206 
207         bs.PutBits(CeilLog2(PicSizeInCtbsY), slice.segment_address);
208     }
209 }
210 
PackSSHPartIndependent(BitstreamWriter & bs,NALU const & nalu,SPS const & sps,PPS const & pps,Slice const & slice)211 void HevcHeaderPacker::PackSSHPartIndependent(
212     BitstreamWriter &bs,
213     NALU const &     nalu,
214     SPS const &      sps,
215     PPS const &      pps,
216     Slice const &    slice)
217 {
218     const mfxU8 I   = 2;
219     mfxU32      nSE = 0;
220 
221     nSE += PutBits(bs, pps.num_extra_slice_header_bits, slice.reserved_flags);
222     nSE += PutUE(bs, slice.type);
223     nSE += pps.output_flag_present_flag && PutBit(bs, slice.pic_output_flag);
224     nSE += (sps.separate_colour_plane_flag == 1) && PutBits(bs, 2, slice.colour_plane_id);
225 
226     bool bNonIDR = nalu.nal_unit_type != IDR_W_RADL && nalu.nal_unit_type != IDR_N_LP;
227 
228     if (bNonIDR)
229         PackSSHPartNonIDR(bs, sps, slice);
230 
231     if (sps.sample_adaptive_offset_enabled_flag)
232     {
233         bs.AddInfo(PACK_SAOOffset, bs.GetOffset());
234 
235         nSE += PutBit(bs, slice.sao_luma_flag);
236         nSE += PutBit(bs, slice.sao_chroma_flag);
237     }
238 
239     if (slice.type != I)
240         PackSSHPartPB(bs, sps, pps, slice);
241 
242     bs.AddInfo(PACK_QPDOffset, bs.GetOffset());
243 
244     nSE += PutSE(bs, slice.slice_qp_delta);
245     nSE += pps.slice_chroma_qp_offsets_present_flag && PutSE(bs, slice.slice_cb_qp_offset);
246     nSE += pps.slice_chroma_qp_offsets_present_flag && PutSE(bs, slice.slice_cr_qp_offset);
247     nSE += pps.deblocking_filter_override_enabled_flag && PutBit(bs, slice.deblocking_filter_override_flag);
248     nSE += slice.deblocking_filter_override_flag && PutBit(bs, slice.deblocking_filter_disabled_flag);
249 
250     bool bPackDblkOffsets = slice.deblocking_filter_override_flag && !slice.deblocking_filter_disabled_flag;
251     nSE += bPackDblkOffsets && PutSE(bs, slice.beta_offset_div2);
252     nSE += bPackDblkOffsets && PutSE(bs, slice.tc_offset_div2);
253 
254     bool bPackSliceLF =
255         (pps.loop_filter_across_slices_enabled_flag && (slice.sao_luma_flag || slice.sao_chroma_flag || !slice.deblocking_filter_disabled_flag));
256 
257     nSE += bPackSliceLF && PutBit(bs, slice.loop_filter_across_slices_enabled_flag);
258 
259     assert(nSE >= 2);
260 }
261 
PackSSHPartNonIDR(BitstreamWriter & bs,SPS const & sps,Slice const & slice)262 void HevcHeaderPacker::PackSSHPartNonIDR(
263     BitstreamWriter &bs,
264     SPS const &      sps,
265     Slice const &    slice)
266 {
267     mfxU32 nSE        = 0;
268     bool   bNeedStIdx = slice.short_term_ref_pic_set_sps_flag && (sps.num_short_term_ref_pic_sets > 1);
269     auto   PutSpsLT   = [&](const Slice::LongTerm &lt) {
270         nSE += (sps.num_long_term_ref_pics_sps > 1) && PutBits(bs, CeilLog2(sps.num_long_term_ref_pics_sps), lt.lt_idx_sps);
271         nSE += PutBit(bs, lt.delta_poc_msb_present_flag);
272         nSE += lt.delta_poc_msb_present_flag && PutUE(bs, lt.delta_poc_msb_cycle_lt);
273     };
274     auto PutSliceLT = [&](const Slice::LongTerm &lt) {
275         nSE += PutBits(bs, sps.log2_max_pic_order_cnt_lsb_minus4 + 4, lt.poc_lsb_lt);
276         nSE += PutBit(bs, lt.used_by_curr_pic_lt_flag);
277         nSE += PutBit(bs, lt.delta_poc_msb_present_flag);
278         nSE += lt.delta_poc_msb_present_flag && PutUE(bs, lt.delta_poc_msb_cycle_lt);
279     };
280 
281     nSE += PutBits(bs, sps.log2_max_pic_order_cnt_lsb_minus4 + 4, slice.pic_order_cnt_lsb);
282     nSE += PutBit(bs, slice.short_term_ref_pic_set_sps_flag);
283 
284     if (!slice.short_term_ref_pic_set_sps_flag)
285     {
286         std::vector<STRPS> strps(sps.strps, sps.strps + sps.num_short_term_ref_pic_sets);
287         strps.push_back(slice.strps);
288         PackSTRPS(bs, strps.data(), sps.num_short_term_ref_pic_sets, sps.num_short_term_ref_pic_sets);
289     }
290 
291     nSE += bNeedStIdx && PutBits(bs, CeilLog2(sps.num_short_term_ref_pic_sets), slice.short_term_ref_pic_set_idx);
292 
293     if (sps.long_term_ref_pics_present_flag)
294     {
295         nSE += (sps.num_long_term_ref_pics_sps > 0) && PutUE(bs, slice.num_long_term_sps);
296         nSE += PutUE(bs, slice.num_long_term_pics);
297 
298         std::for_each(slice.lt, slice.lt + slice.num_long_term_sps, PutSpsLT);
299         std::for_each(slice.lt, slice.lt + slice.num_long_term_pics, PutSliceLT);
300     }
301 
302     nSE += sps.temporal_mvp_enabled_flag && PutBit(bs, slice.temporal_mvp_enabled_flag);
303 
304     assert(nSE >= 2);
305 }
306 
PackSTRPS(BitstreamWriter & bs,const STRPS * sets,mfxU32 num,mfxU32 idx)307 void HevcHeaderPacker::PackSTRPS(BitstreamWriter &bs, const STRPS *sets, mfxU32 num, mfxU32 idx)
308 {
309     //This function is not needed for I frame.
310     STRPS const &strps = sets[idx];
311 
312     if (idx != 0)
313         bs.PutBit(strps.inter_ref_pic_set_prediction_flag);
314 
315     if (strps.inter_ref_pic_set_prediction_flag)
316     {
317         if (idx == num)
318             bs.PutUE(strps.delta_idx_minus1);
319 
320         bs.PutBit(strps.delta_rps_sign);
321         bs.PutUE(strps.abs_delta_rps_minus1);
322 
323         mfxU32 RefRpsIdx    = idx - (strps.delta_idx_minus1 + 1);
324         mfxU32 NumDeltaPocs = sets[RefRpsIdx].num_negative_pics + sets[RefRpsIdx].num_positive_pics;
325 
326         std::for_each(
327             strps.pic, strps.pic + NumDeltaPocs + 1, [&](const STRPS::Pic &pic) {
328                 bs.PutBit(pic.used_by_curr_pic_flag);
329 
330                 if (!pic.used_by_curr_pic_flag)
331                     bs.PutBit(pic.use_delta_flag);
332             });
333     }
334     else
335     {
336         bs.PutUE(strps.num_negative_pics);
337         bs.PutUE(strps.num_positive_pics);
338 
339         std::for_each(
340             strps.pic, strps.pic + strps.num_positive_pics + strps.num_negative_pics, [&](const STRPS::Pic &pic) {
341                 bs.PutUE(pic.delta_poc_sx_minus1);
342                 bs.PutBit(pic.used_by_curr_pic_sx_flag);
343             });
344     }
345 };
346 
PackSSHPartPB(BitstreamWriter & bs,SPS const & sps,PPS const & pps,Slice const & slice)347 void HevcHeaderPacker::PackSSHPartPB(
348     BitstreamWriter &bs,
349     SPS const &      sps,
350     PPS const &      pps,
351     Slice const &    slice)
352 {
353     //This function is not needed for I frame.
354     auto   IsSTUsed        = [](const STRPSPic &pic) { return !!pic.used_by_curr_pic_sx_flag; };
355     auto   IsLTUsed        = [](const Slice::LongTerm &lt) { return !!lt.used_by_curr_pic_lt_flag; };
356     bool   bB              = (slice.type == 0);
357     mfxU32 nSE             = 0;
358     auto   stEnd           = slice.strps.pic + slice.strps.num_negative_pics + slice.strps.num_positive_pics;
359     auto   ltEnd           = slice.lt + slice.num_long_term_sps + slice.num_long_term_pics;
360     mfxU16 NumPocTotalCurr = mfxU16(std::count_if(slice.strps.pic, stEnd, IsSTUsed) + std::count_if(slice.lt, ltEnd, IsLTUsed));
361     bool   bNeedRefPicListM0      = pps.lists_modification_present_flag && NumPocTotalCurr > 1;
362     bool   bNeedRefPicListM1      = bNeedRefPicListM0 && bB;
363     bool   bNeedCRefIdx0   = (slice.collocated_from_l0_flag && slice.num_ref_idx_l0_active_minus1 > 0);
364     bool   bNeedCRefIdx1   = (!slice.collocated_from_l0_flag && slice.num_ref_idx_l1_active_minus1 > 0);
365     bool   bNeedCRefIdx    = slice.temporal_mvp_enabled_flag && (bNeedCRefIdx0 || bNeedCRefIdx1);
366     auto   PackRefPicListM        = [&](mfxU8 lx, mfxU8 num) {
367         nSE += PutBit(bs, !!slice.ref_pic_list_modification_flag_lx[lx]);
368 
369         num *= !!slice.ref_pic_list_modification_flag_lx[lx];
370 
371         std::for_each(slice.list_entry_lx[lx], slice.list_entry_lx[lx] + num, [&](mfxU8 entry) {
372             nSE += PutBits(bs, CeilLog2(NumPocTotalCurr), entry);
373         });
374     };
375 
376     nSE += PutBit(bs, slice.num_ref_idx_active_override_flag);
377     nSE += slice.num_ref_idx_active_override_flag && PutUE(bs, slice.num_ref_idx_l0_active_minus1);
378     nSE += slice.num_ref_idx_active_override_flag && bB && PutUE(bs, slice.num_ref_idx_l1_active_minus1);
379 
380     if (bNeedRefPicListM0)
381     {
382         PackRefPicListM(0, slice.num_ref_idx_l0_active_minus1 + 1);
383     }
384 
385     if (bNeedRefPicListM1)
386     {
387         PackRefPicListM(1, slice.num_ref_idx_l1_active_minus1 + 1);
388     }
389 
390     nSE += bB && PutBit(bs, slice.mvd_l1_zero_flag);
391     nSE += pps.cabac_init_present_flag && PutBit(bs, slice.cabac_init_flag);
392     nSE += slice.temporal_mvp_enabled_flag && bB && PutBit(bs, slice.collocated_from_l0_flag);
393     nSE += bNeedCRefIdx && PutUE(bs, slice.collocated_ref_idx);
394 
395     PackSSHPWT(bs, sps, pps, slice);
396 
397     nSE += PutUE(bs, slice.five_minus_max_num_merge_cand);
398 
399     assert(nSE >= 2);
400 }
401 
PackSSHPWT(BitstreamWriter & bs,const SPS & sps,const PPS & pps,const Slice & slice)402 bool HevcHeaderPacker::PackSSHPWT(
403     BitstreamWriter &bs, const SPS &sps, const PPS &pps, const Slice &slice)
404 {
405     //This function is not needed for I frame.
406     constexpr mfxU16 Y = 0, Cb = 1, Cr = 2, W = 0, O = 1, P = 1, B = 0;
407 
408     struct AccWFlag : std::pair<mfxU16, mfxI16>
409     {
410         AccWFlag(mfxU16 idx, mfxI16 w) : std::pair<mfxU16, mfxI16>(idx, w) {}
411         mfxU16 operator()(mfxU16 wf, const mfxI16 (&pwt)[3][2])
412         {
413             return (wf << 1) + !(pwt[first][O] == 0 && pwt[first][W] == second);
414         };
415     };
416 
417     bool   bNeedY = (pps.weighted_pred_flag && slice.type == P) || (pps.weighted_bipred_flag && slice.type == B);
418     bool   bNeedC = bNeedY && (sps.chroma_format_idc != 0);
419     mfxI16 BdOffsetC =
420         sps.high_precision_offsets_enabled_flag * (sps.bit_depth_chroma_minus8 + 8 - 1) + !sps.high_precision_offsets_enabled_flag * 7;
421     mfxU32 nSE         = 0;
422     mfxI16 WpOffC      = (1 << BdOffsetC);
423     mfxI16 wY          = (1 << slice.luma_log2_weight_denom);
424     mfxI16 wC          = (1 << slice.chroma_log2_weight_denom);
425     mfxI16 l2WDc       = slice.chroma_log2_weight_denom;
426     auto   startOffset = bs.GetOffset();
427 
428     auto PutPwtLX = [&](const mfxI16(&pwtLX)[16][3][2], mfxU32 sz) {
429         mfxU32 szY      = sz * bNeedY;
430         mfxU32 szC      = sz * bNeedC;
431         mfxU16 wfmap    = (1 << (szY - 1));
432         mfxU16 lumaw    = 0;
433         mfxU16 chromaw  = 0;
434         auto   PutWOVal = [&](const mfxI16(&pwt)[3][2]) {
435             bool bPutY = !!(lumaw & wfmap);
436             bool bPutC = !!(chromaw & wfmap);
437 
438             nSE += bPutY && PutSE(bs, pwt[Y][W] - wY);
439             nSE += bPutY && PutSE(bs, pwt[Y][O]);
440 
441             nSE += bPutC && PutSE(bs, pwt[Cb][W] - wC);
442             nSE += bPutC && PutSE(bs, clamp(((WpOffC * pwt[Cb][W]) >> l2WDc) + pwt[Cb][O] - WpOffC, -4 * WpOffC, 4 * WpOffC - 1));
443 
444             nSE += bPutC && PutSE(bs, pwt[Cb][W] - wC);
445             nSE += bPutC && PutSE(bs, clamp(((WpOffC * pwt[Cr][W]) >> l2WDc) + pwt[Cr][O] - WpOffC, -4 * WpOffC, 4 * WpOffC - 1));
446 
447             wfmap >>= 1;
448         };
449 
450         lumaw   = std::accumulate(pwtLX, pwtLX + szY, mfxU16(0), AccWFlag(Y, wY));
451         chromaw = std::accumulate(pwtLX, pwtLX + szC, mfxU16(0), AccWFlag(Cb, wC));
452         chromaw |= std::accumulate(pwtLX, pwtLX + szC, mfxU16(0), AccWFlag(Cr, wC));
453 
454         nSE += szY && PutBits(bs, szY, lumaw);
455         nSE += szC && PutBits(bs, szC, chromaw);
456 
457         std::for_each(pwtLX, pwtLX + szY, PutWOVal);
458     };
459 
460     bs.AddInfo(PACK_PWTOffset, startOffset);
461 
462     nSE += bNeedY && PutUE(bs, slice.luma_log2_weight_denom);
463     nSE += bNeedC && PutSE(bs, mfxI32(slice.chroma_log2_weight_denom) - slice.luma_log2_weight_denom);
464 
465     PutPwtLX(slice.pwt[0], slice.num_ref_idx_l0_active_minus1 + 1);
466     PutPwtLX(slice.pwt[1], (slice.num_ref_idx_l1_active_minus1 + 1) * (slice.type == B));
467 
468     bs.AddInfo(PACK_PWTLength, bs.GetOffset() - startOffset);
469 
470     return !!nSE;
471 }
472 
LoadSliceHeaderParams(CodecEncodeHevcSliceHeaderParams * pSH)473 MOS_STATUS HevcHeaderPacker::LoadSliceHeaderParams(CodecEncodeHevcSliceHeaderParams* pSH)
474 {
475     CODECHAL_ENCODE_CHK_NULL_RETURN(pSH);
476 
477     m_spsParams.log2_max_pic_order_cnt_lsb_minus4       = pSH->log2_max_pic_order_cnt_lsb_minus4;
478     m_sliceParams.pic_order_cnt_lsb                     &= ~(0xFFFFFFFF << (m_spsParams.log2_max_pic_order_cnt_lsb_minus4 + 4));
479     m_sliceParams.num_long_term_pics                    = pSH->num_long_term_pics;
480     for (int i = 0; i < m_sliceParams.num_long_term_pics; i++)
481     {
482         m_sliceParams.lt[i].used_by_curr_pic_lt_flag   = pSH->lt[i].used_by_curr_pic_lt_flag;
483         m_sliceParams.lt[i].delta_poc_msb_present_flag = pSH->lt[i].delta_poc_msb_present_flag;
484         m_sliceParams.lt[i].poc_lsb_lt                 = pSH->lt[i].poc_lsb_lt;
485         m_sliceParams.lt[i].delta_poc_msb_cycle_lt     = pSH->lt[i].delta_poc_msb_cycle_lt;
486     }
487     m_ppsParams.lists_modification_present_flag         = pSH->lists_modification_present_flag;
488     for (int i = 0; i < 2; i++)
489     {
490         m_sliceParams.ref_pic_list_modification_flag_lx[i] = pSH->ref_pic_list_modification_flag_lx[i];
491         for (int j = 0; j < 16; j++)
492         {
493             m_sliceParams.list_entry_lx[i][j] = pSH->list_entry_lx[i][j];
494         }
495     }
496     m_sliceParams.strps.num_negative_pics                   = pSH->num_negative_pics;
497     m_sliceParams.strps.num_positive_pics                   = pSH->num_positive_pics;
498 
499     if (pSH->num_negative_pics > 16 || pSH->num_positive_pics > 16 || pSH->num_negative_pics + pSH->num_positive_pics > 16)
500     {
501         return MOS_STATUS_INVALID_PARAMETER;
502     }
503 
504     for (int i = 0; i < pSH->num_negative_pics + pSH->num_positive_pics; i++)
505     {
506         if (i < pSH->num_negative_pics)
507         {
508             m_sliceParams.strps.pic[i].delta_poc_s0_minus1      = pSH->delta_poc_minus1[0][i];
509             m_sliceParams.strps.pic[i].used_by_curr_pic_s0_flag = pSH->used_by_curr_pic_flag[0][i];
510         }
511         else
512         {
513             m_sliceParams.strps.pic[i].delta_poc_s1_minus1      = pSH->delta_poc_minus1[1][i - pSH->num_negative_pics];
514             m_sliceParams.strps.pic[i].used_by_curr_pic_s1_flag = pSH->used_by_curr_pic_flag[1][i - pSH->num_negative_pics];
515         }
516     }
517 
518     return MOS_STATUS_SUCCESS;
519 }
520 
SliceHeaderPacker(EncoderParams * encodeParams)521 MOS_STATUS HevcHeaderPacker::SliceHeaderPacker(EncoderParams *encodeParams)
522 {
523     MOS_OS_FUNCTION_ENTER;
524     MOS_STATUS      eStatus;
525     BitstreamWriter rbsp(m_rbsp.data(), (mfxU32)m_rbsp.size());
526     mfxU8 *         pBegin      = rbsp.GetStart();
527     mfxU8 *         startplace  = pBegin;
528     mfxU8 *         pEnd        = pBegin + m_rbsp.size();
529     mfxU32          BitLen;
530     mfxU32          BitLenRecorded = 0;
531 
532     EncoderParams *pCodecHalEncodeParams = encodeParams;
533     CODECHAL_ENCODE_CHK_NULL_RETURN(pCodecHalEncodeParams);
534     BSBuffer *pBSBuffer = pCodecHalEncodeParams->pBSBuffer;
535     CODECHAL_ENCODE_CHK_NULL_RETURN(pBSBuffer);
536     PCODEC_ENCODER_SLCDATA pSlcData = (PCODEC_ENCODER_SLCDATA)pCodecHalEncodeParams->pSlcHeaderData;
537     CODECHAL_ENCODE_CHK_STATUS_RETURN(GetSPSParams(static_cast<PCODEC_HEVC_ENCODE_SEQUENCE_PARAMS>(encodeParams->pSeqParams)));
538     CODECHAL_ENCODE_CHK_STATUS_RETURN(GetPPSParams(static_cast<PCODEC_HEVC_ENCODE_PICTURE_PARAMS>(encodeParams->pPicParams)));
539     CODECHAL_ENCODE_CHK_STATUS_RETURN(GetNaluParams(nalType, 0, 0, pBSBuffer->pCurrent == pBSBuffer->pBase));
540 
541     //uint8_t *pCurrent = pBSBuffer->pCurrent;
542     //uint32_t
543     for (uint32_t startLcu = 0, slcCount = 0; slcCount < encodeParams->dwNumSlices; slcCount++)
544     {
545         //startLcu += m_hevcSliceParams[slcCount].NumLCUsInSlice;
546         CODECHAL_ENCODE_CHK_STATUS_RETURN(GetSliceParams(static_cast<PCODEC_HEVC_ENCODE_SLICE_PARAMS>(encodeParams->pSliceParams)[slcCount]));
547         CODECHAL_ENCODE_CHK_STATUS_RETURN(LoadSliceHeaderParams((CodecEncodeHevcSliceHeaderParams*) pCodecHalEncodeParams->pSliceHeaderParams));
548 
549         rbsp.Reset(pBegin, mfxU32(pEnd - pBegin));
550         m_naluParams.long_start_code = 0/*pBSBuffer->pCurrent + (BitLenRecorded + 7) / 8 == pBSBuffer->pBase*/;
551         PackSSH(rbsp, m_naluParams, m_spsParams, m_ppsParams, m_sliceParams, m_bDssEnabled);
552         BitLen = rbsp.GetOffset();
553         pBegin += CeilDiv(BitLen, 8u);
554         pSlcData[slcCount].SliceOffset            = (uint32_t)(pBSBuffer->pCurrent + (BitLenRecorded + 7) / 8 - pBSBuffer->pBase);
555         pSlcData[slcCount].BitSize                = BitLen * 1 + (BitLen + 7) / 8 * !1;
556         pSlcData[slcCount].SkipEmulationByteCount = 3 /*+ (pBSBuffer->pCurrent + (BitLenRecorded + 7) / 8 == pBSBuffer->pBase)*/;
557         BitLenRecorded                            = BitLenRecorded + BitLen;
558     }
559 
560     MOS_SecureMemcpy(pBSBuffer->pCurrent,
561         (BitLenRecorded + 7) / 8,
562         startplace,
563         (BitLenRecorded + 7) / 8);
564 
565     return MOS_STATUS_SUCCESS;
566 }