1 // Copyright (c) 2018-2020 Intel Corporation
2 //
3 // Permission is hereby granted, free of charge, to any person obtaining a copy
4 // of this software and associated documentation files (the "Software"), to deal
5 // in the Software without restriction, including without limitation the rights
6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 // copies of the Software, and to permit persons to whom the Software is
8 // furnished to do so, subject to the following conditions:
9 //
10 // The above copyright notice and this permission notice shall be included in all
11 // copies or substantial portions of the Software.
12 //
13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 // SOFTWARE.
20 
21 #include "mfx_common.h"
22 #ifdef MFX_ENABLE_H264_VIDEO_ENCODE_HW
23 
24 #include <functional>
25 #include <algorithm>
26 #include <stdexcept>
27 #include <numeric>
28 #include <math.h>
29 #include <limits.h> /* for INT_MIN, INT_MAX, etc. on Linux/Android */
30 
31 
32 #include "cmrt_cross_platform.h"
33 
34 #include <assert.h>
35 #include "vm_time.h"
36 #include "mfx_brc_common.h"
37 #include "mfx_h264_encode_hw_utils.h"
38 #include "libmfx_core.h"
39 #include "umc_video_data.h"
40 #include "fast_copy.h"
41 
42 
43 using namespace MfxHwH264Encode;
44 
45 namespace MfxHwH264Encode
46 {
47     const mfxU32 NUM_CLOCK_TS[9] = { 1, 1, 1, 2, 2, 3, 3, 2, 3 };
48 
CalcNumFrameMin(const MfxHwH264Encode::MfxVideoParam & par,MFX_ENCODE_CAPS const & hwCaps)49     mfxU16 CalcNumFrameMin(const MfxHwH264Encode::MfxVideoParam &par, MFX_ENCODE_CAPS const & hwCaps)
50     {
51         mfxU16 numFrameMin = 0;
52 
53         if (IsMvcProfile(par.mfx.CodecProfile))//MVC
54         {
55             if (par.IOPattern == MFX_IOPATTERN_IN_SYSTEM_MEMORY)
56             {
57                 numFrameMin = par.mfx.GopRefDist;
58             }
59             else // MFX_IOPATTERN_IN_VIDEO_MEMORY || MFX_IOPATTERN_IN_OPAQUE_MEMORY
60             {
61                 mfxExtCodingOptionDDI & extDdi = GetExtBufferRef(par);
62                 numFrameMin = IsOn(extDdi.RefRaw)
63                     ? par.mfx.GopRefDist + par.mfx.NumRefFrame
64                     : par.mfx.GopRefDist;
65             }
66 
67             numFrameMin = numFrameMin + par.AsyncDepth - 1;
68 
69             mfxExtMVCSeqDesc & extMvc = GetExtBufferRef(par);
70             numFrameMin = mfxU16(std::min(0xffffu, numFrameMin * extMvc.NumView));
71         }
72         if (IsAvcProfile(par.mfx.CodecProfile)) //AVC
73         {
74             mfxExtCodingOption2 *       extOpt2 = GetExtBuffer(par);
75             mfxExtCodingOption3 *       extOpt3 = GetExtBuffer(par);
76 
77             mfxU32  adaptGopDelay = 0;
78 #if defined(MFX_ENABLE_ENCTOOLS)
79             adaptGopDelay = H264EncTools::GetPreEncDelay(par);
80 #endif
81             mfxU16 mctfFrames = IsMctfSupported(par) ? (par.AsyncDepth > 1 ? 0 : 1) : 0;
82             if (par.IOPattern == MFX_IOPATTERN_IN_SYSTEM_MEMORY)
83             {
84                 numFrameMin = (mfxU16)(par.mfx.GopRefDist + adaptGopDelay + mctfFrames + par.AsyncDepth - 1);
85             }
86             else // MFX_IOPATTERN_IN_VIDEO_MEMORY || MFX_IOPATTERN_IN_OPAQUE_MEMORY
87             {
88                 numFrameMin = (mfxU16)AsyncRoutineEmulator(par, adaptGopDelay).GetTotalGreediness() + par.AsyncDepth - 1;
89 
90                 mfxExtCodingOptionDDI & extDdi = GetExtBufferRef(par);
91                 numFrameMin += IsOn(extDdi.RefRaw)
92                     ? par.mfx.NumRefFrame
93                     : 0;
94 
95                 // strange thing but for backward compatibility:
96                 //   msdk needs to tell how many surfaces application will need for reordering
97                 //   even if application does this reordering(!!!)
98                 if (par.mfx.EncodedOrder)
99                     numFrameMin += par.mfx.GopRefDist - 1;
100 
101                 if (extOpt2 && extOpt2->MaxSliceSize != 0 && !IsDriverSliceSizeControlEnabled(par, hwCaps))
102                     numFrameMin++;
103                 if (extOpt3 && IsOn(extOpt3->FadeDetection))
104                     numFrameMin++;
105             }
106         }
107 
108         return numFrameMin;
109     }
110 
Less(mfxU32 num1,mfxU32 num2)111     inline bool Less(mfxU32 num1, mfxU32 num2)
112     {
113         return (num1 - num2) >= 0x80000000;
114     }
115 
SetSurfaceFree(Surface & surf)116     void SetSurfaceFree(Surface & surf)
117     {
118         surf.SetFree(true);
119     }
120 
SetReconstructFree(Reconstruct & rec)121     void SetReconstructFree(Reconstruct & rec)
122     {
123         rec.SetFree(true);
124         rec.m_reference[TFIELD] = false;
125         rec.m_reference[BFIELD] = false;
126     }
127 
128     struct SetReconstructFreeAndDecRef
129     {
SetReconstructFreeAndDecRefMfxHwH264Encode::SetReconstructFreeAndDecRef130         explicit SetReconstructFreeAndDecRef(VideoCORE& core) : m_core(core) {}
131 
operator ()MfxHwH264Encode::SetReconstructFreeAndDecRef132         void operator() (Reconstruct& rec)
133         {
134             SetReconstructFree(rec);
135             if (rec.m_yuv && rec.m_yuv->Data.Locked > 0)
136                 m_core.DecreaseReference(&rec.m_yuv->Data);
137             rec.m_yuv = 0;
138         }
139 
140     private:
141         void operator=(const SetReconstructFreeAndDecRef&);
142         VideoCORE& m_core;
143     };
144 
GetPicStruct(MfxVideoParam const & video,DdiTask const & task)145     PairU16 GetPicStruct(
146         MfxVideoParam const & video,
147         DdiTask const &       task)
148     {
149         mfxU16 runtPs = task.m_yuv->Info.PicStruct;
150 
151         //if (mfxExtVppAuxData const * extAuxData = GetExtBuffer(task.m_ctrl))
152         //    if (extAuxData->PicStruct != MFX_PICSTRUCT_UNKNOWN)
153         //        runtPs = extAuxData->PicStruct;
154 
155         return GetPicStruct(video, runtPs);
156     }
157 
GetPicStruct(MfxVideoParam const & video,mfxU16 runtPs)158     PairU16 GetPicStruct(
159         MfxVideoParam const & video,
160         mfxU16                runtPs)
161     {
162         mfxExtCodingOption const & extOpt = GetExtBufferRef(video);
163         mfxU16 initPs   = video.mfx.FrameInfo.PicStruct;
164         mfxU16 framePic = extOpt.FramePicture;
165         mfxU16 fieldOut = extOpt.FieldOutput;
166 
167         static mfxU16 const PRG  = MFX_PICSTRUCT_PROGRESSIVE;
168         static mfxU16 const TFF  = MFX_PICSTRUCT_FIELD_TFF;
169         static mfxU16 const BFF  = MFX_PICSTRUCT_FIELD_BFF;
170         static mfxU16 const UNK  = MFX_PICSTRUCT_UNKNOWN;
171         static mfxU16 const DBL  = MFX_PICSTRUCT_FRAME_DOUBLING;
172         static mfxU16 const TRPL = MFX_PICSTRUCT_FRAME_TRIPLING;
173         static mfxU16 const REP  = MFX_PICSTRUCT_FIELD_REPEATED;
174 
175         PairU16 ps = MakePair(PRG, PRG);
176 
177         if (initPs == PRG) { assert(IsOff(fieldOut)); }
178         if (initPs == UNK && runtPs == UNK) { assert(!"unsupported picstruct combination"); }
179 
180         if (initPs == PRG && runtPs == UNK)
181             ps = MakePair(PRG, PRG);
182         else if (initPs == PRG && runtPs == PRG)
183             ps = MakePair(PRG, PRG);
184         else if (initPs == PRG && runtPs == (PRG | DBL))
185             ps = MakePair<mfxU16>(PRG, PRG | DBL);
186         else if (initPs == PRG && runtPs == (PRG | TRPL))
187             ps = MakePair<mfxU16>(PRG, PRG | TRPL);
188         else if (initPs == BFF && runtPs == UNK)
189             ps = MakePair(BFF, BFF);
190         else if ((initPs == BFF || initPs == UNK) && runtPs == BFF)
191             ps = MakePair(BFF, BFF);
192         else if (initPs == TFF && runtPs == UNK)
193             ps = MakePair(TFF, TFF);
194         else if ((initPs == TFF || initPs == UNK) && runtPs == TFF)
195             ps = MakePair(TFF, TFF);
196         else if (initPs == UNK && runtPs == (PRG | BFF))
197             ps = MakePair<mfxU16>(PRG, PRG | BFF);
198         else if (initPs == UNK && runtPs == (PRG | TFF))
199             ps = MakePair<mfxU16>(PRG, PRG | TFF);
200         else if (initPs == UNK && runtPs == (PRG | BFF | REP))
201             ps = MakePair<mfxU16>(PRG, PRG | BFF | REP);
202         else if (initPs == UNK && runtPs == (PRG | TFF | REP))
203             ps = MakePair<mfxU16>(PRG, PRG | TFF | REP);
204         else if ((initPs == TFF || initPs == UNK) && runtPs == PRG)
205             ps = MakePair<mfxU16>(PRG, PRG | TFF);
206         else if (initPs == BFF && runtPs == PRG)
207             ps = MakePair<mfxU16>(PRG, PRG | BFF);
208         else if (initPs == PRG)
209             ps = MakePair(PRG, PRG);
210         else if (initPs == BFF)
211             ps = MakePair(BFF, BFF);
212         else if (initPs == TFF)
213             ps = MakePair(TFF, TFF);
214         else if (initPs == UNK && framePic == MFX_CODINGOPTION_OFF)
215             ps = MakePair(TFF, TFF);
216         else if (initPs == UNK && framePic != MFX_CODINGOPTION_OFF)
217             ps = MakePair(PRG, PRG);
218 
219         if (IsOn(fieldOut) && ps[ENC] == PRG)
220             ps[ENC] = ps[DISP] = (ps[1] & BFF) ? BFF : TFF;
221 
222         return ps;
223     }
224 
isBitstreamUpdateRequired(MfxVideoParam const & video,MFX_ENCODE_CAPS caps,eMFXHWType)225     bool isBitstreamUpdateRequired(MfxVideoParam const & video,
226         MFX_ENCODE_CAPS caps,
227         eMFXHWType )
228     {
229         if(video.Protected)
230         {
231             return false;
232         }
233 
234         mfxExtCodingOption2 & extOpt2 = GetExtBufferRef(video);
235         if(video.mfx.LowPower == MFX_CODINGOPTION_ON)
236             return video.calcParam.numTemporalLayer > 0;
237         else if(extOpt2.MaxSliceSize)
238             return true;
239         else if(caps.ddi_caps.HeaderInsertion == 1)
240             return true;
241         return false;
242     }
243 
ExtendFrameType(mfxU32 type)244     PairU8 ExtendFrameType(mfxU32 type)
245     {
246         mfxU32 type1 = type & 0xff;
247         mfxU32 type2 = type >> 8;
248 
249         if (type2 == 0)
250         {
251             type2 = type1 & ~MFX_FRAMETYPE_IDR; // second field can't be IDR
252 
253             if (type1 & MFX_FRAMETYPE_I)
254             {
255                 type2 &= ~MFX_FRAMETYPE_I;
256                 type2 |=  MFX_FRAMETYPE_P;
257             }
258         }
259 
260         return PairU8(type1, type2);
261     }
262 
CheckSubMbPartition(mfxExtCodingOptionDDI const * extDdi,mfxU8 frameType)263     bool CheckSubMbPartition(mfxExtCodingOptionDDI const * extDdi, mfxU8 frameType)
264     {
265         if (frameType & MFX_FRAMETYPE_P)
266             return IsOff(extDdi->DisablePSubMBPartition);
267         if (frameType & MFX_FRAMETYPE_B)
268             return IsOff(extDdi->DisablePSubMBPartition);
269         return true;
270     }
271 
GetPFrameLevel(mfxU32 i,mfxU32 num)272     mfxU8 GetPFrameLevel(mfxU32 i, mfxU32 num)
273     {
274         if (i == 0 || i >= num) return 0;
275         mfxU32 level = 1;
276         mfxU32 begin = 0;
277         mfxU32 end = num;
278         mfxU32 t = (begin + end + 1) / 2;
279 
280         while (t != i)
281         {
282             level++;
283             if (i > t)
284                 begin = t;
285             else
286                 end = t;
287             t = (begin + end + 1) / 2;
288         }
289         return (mfxU8)level;
290     }
291 
PLayer(MfxVideoParam const & par,mfxU32 order)292     mfxU8 PLayer(MfxVideoParam const & par, mfxU32 order)
293     {
294         return std::min<mfxU8>(7, GetPFrameLevel(order % par.calcParam.PPyrInterval, par.calcParam.PPyrInterval));
295     }
296 
GetQpValue(DdiTask const & task,MfxVideoParam const & par,mfxU32 frameType)297     mfxU8 GetQpValue(
298         DdiTask const &       task,
299         MfxVideoParam const & par,
300         mfxU32                frameType)
301     {
302         const mfxExtCodingOption2& CO2 = GetExtBufferRef(par);
303         const mfxExtCodingOption3& CO3 = GetExtBufferRef(par);
304         const mfxU8 minQP = 1;
305         const mfxU8 maxQP = 51;
306         mfxU8 QP = 0;
307 
308         if (par.mfx.RateControlMethod == MFX_RATECONTROL_CQP /*||
309             par.mfx.RateControlMethod == MFX_RATECONTROL_VCM && (frameType & MFX_FRAMETYPE_I)*/)
310         {
311             if (task.m_ctrl.QP > 0)
312             {
313                 if (IsOn(par.mfx.LowPower) && task.m_ctrl.QP < 10)
314                     return 10;
315                 // get per frame qp
316                 return std::min<mfxU8>(task.m_ctrl.QP, 51);
317             }
318             else
319             {
320                 bool bUseQPOffset =
321                     (frameType & MFX_FRAMETYPE_B && CO2.BRefType == MFX_B_REF_PYRAMID)
322                     || (frameType & MFX_FRAMETYPE_P && CO3.PRefType == MFX_P_REF_PYRAMID);
323 
324                 // get per stream qp
325                 switch (frameType & MFX_FRAMETYPE_IPB)
326                 {
327                 case MFX_FRAMETYPE_I:
328                     return mfxU8(par.mfx.QPI + task.m_QPdelta);
329                 case MFX_FRAMETYPE_P:
330                     QP = mfxU8(par.mfx.QPP);
331                     if (task.m_bQPDelta)
332                     {
333                         QP = (mfxU8)mfx::clamp<mfxI32>(task.m_QPdelta + QP, minQP, maxQP);
334                     }
335                     else if (par.calcParam.numTemporalLayer > 1)
336                     {
337                         QP = (mfxU8)mfx::clamp<mfxI32>(CO3.QPOffset[task.m_tid] + QP, minQP, maxQP);
338                     }
339                     return QP;
340                 case MFX_FRAMETYPE_B:
341                     QP = mfxU8(par.mfx.QPB);
342                     if (task.m_bQPDelta)
343                     {
344                         QP = (mfxU8)mfx::clamp<mfxI32>(task.m_QPdelta + QP, minQP, maxQP);
345                     }
346                     else if (bUseQPOffset && (task.m_currGopRefDist == 0 || task.m_currGopRefDist > 2))
347                     {
348                         QP = (mfxU8)mfx::clamp<mfxI32>(
349                             CO3.QPOffset[mfx::clamp<mfxI32>(task.m_loc.level - 1, 0, 7)] + QP, minQP, maxQP);
350                     }
351                     return QP;
352                 default: assert(!"bad frame type (GetQpValue)"); return 0xff;
353                 }
354 
355             }
356         }
357 
358         return 26;
359     }
360 
IsPreferred(mfxExtAVCRefListCtrl const & refPicListCtrl,mfxU32 frameOrder,mfxU32 picStruct)361     bool IsPreferred(mfxExtAVCRefListCtrl const & refPicListCtrl, mfxU32 frameOrder, mfxU32 picStruct)
362     {
363         for (mfxU8 j = 0; j < 16; j ++)
364             if (refPicListCtrl.PreferredRefList[j].FrameOrder == frameOrder &&
365                 refPicListCtrl.PreferredRefList[j].PicStruct == picStruct)
366                 return true;
367 
368         return false;
369     }
370 
IsRejected(mfxExtAVCRefListCtrl const & refPicListCtrl,mfxU32 frameOrder,mfxU32 picStruct)371     bool IsRejected(mfxExtAVCRefListCtrl const & refPicListCtrl, mfxU32 frameOrder, mfxU32 picStruct)
372     {
373         for (mfxU8 j = 0; j < 16; j ++)
374             if (refPicListCtrl.RejectedRefList[j].FrameOrder == frameOrder &&
375                 refPicListCtrl.PreferredRefList[j].PicStruct == picStruct)
376                 return true;
377 
378         return false;
379     }
380 
381 
GetPicNum(std::vector<Reconstruct> const & recons,ArrayDpbFrame const & dpb,mfxU8 ref)382     mfxI32 GetPicNum(
383         std::vector<Reconstruct> const & recons,
384         ArrayDpbFrame const &            dpb,
385         mfxU8                            ref)
386     {
387         Reconstruct const & recFrame = recons[dpb[ref & 127].m_frameIdx];
388         return recFrame.m_picNum[ref >> 7];
389     }
390 
GetPicNumF(std::vector<Reconstruct> const & recons,ArrayDpbFrame const & dpb,mfxU8 ref)391     mfxI32 GetPicNumF(
392         std::vector<Reconstruct> const & recons,
393         ArrayDpbFrame const &            dpb,
394         mfxU8                            ref)
395     {
396         Reconstruct const & recFrame = recons[dpb[ref & 127].m_frameIdx];
397         return recFrame.m_reference[ref >> 7]
398             ? recFrame.m_picNum[ref >> 7]
399             : 0x20000;
400     }
401 
GetLongTermPicNum(std::vector<Reconstruct> const & recons,ArrayDpbFrame const & dpb,mfxU8 ref)402     mfxU8 GetLongTermPicNum(
403         std::vector<Reconstruct> const & recons,
404         ArrayDpbFrame const &            dpb,
405         mfxU8                            ref)
406     {
407         Reconstruct const & recFrame = recons[dpb[ref & 127].m_frameIdx];
408         return recFrame.m_longTermPicNum[ref >> 7];
409     }
410 
GetLongTermPicNumF(std::vector<Reconstruct> const & recons,ArrayDpbFrame const & dpb,mfxU8 ref)411     mfxU32 GetLongTermPicNumF(
412         std::vector<Reconstruct> const & recons,
413         ArrayDpbFrame const &            dpb,
414         mfxU8                            ref)
415     {
416         DpbFrame const    & dpbFrame = dpb[ref & 127];
417         Reconstruct const & recFrame = recons[dpbFrame.m_frameIdx];
418 
419         return recFrame.m_reference[ref >> 7] && dpbFrame.m_longterm
420             ? recFrame.m_longTermPicNum[ref >> 7]
421             : 0x20;
422     }
423 
424     struct BasePredicateForRefPic
425     {
426         typedef std::vector<Reconstruct> Recons;
427         typedef ArrayDpbFrame            Dpb;
428         typedef mfxU8                    Arg;
429         typedef bool                     Res;
430 
BasePredicateForRefPicMfxHwH264Encode::BasePredicateForRefPic431         BasePredicateForRefPic(Recons const & recons, Dpb const & dpb)
432         : m_recons(recons)
433         , m_dpb(dpb)
434         {
435         }
436 
437         Recons const & m_recons;
438         Dpb const &    m_dpb;
439     };
440 
441     struct RefPicNumIsGreater : public BasePredicateForRefPic
442     {
RefPicNumIsGreaterMfxHwH264Encode::RefPicNumIsGreater443         RefPicNumIsGreater(Recons const & recons, Dpb const & dpb)
444         : BasePredicateForRefPic(recons, dpb)
445         {
446         }
447 
operator ()MfxHwH264Encode::RefPicNumIsGreater448         bool operator ()(mfxU8 l, mfxU8 r) const
449         {
450             return Less(
451                 GetPicNum(m_recons, m_dpb, r),
452                 GetPicNum(m_recons, m_dpb, l));
453         }
454     };
455 
456     struct LongTermRefPicNumIsLess : public BasePredicateForRefPic
457     {
LongTermRefPicNumIsLessMfxHwH264Encode::LongTermRefPicNumIsLess458         LongTermRefPicNumIsLess(Recons const & recons, Dpb const & dpb)
459         : BasePredicateForRefPic(recons, dpb)
460         {
461         }
462 
operator ()MfxHwH264Encode::LongTermRefPicNumIsLess463         bool operator ()(mfxU8 l, mfxU8 r) const
464         {
465             return Less(
466                 GetLongTermPicNum(m_recons, m_dpb, l),
467                 GetLongTermPicNum(m_recons, m_dpb, r));
468         }
469     };
470 
471     struct RefPocIsLess : public BasePredicateForRefPic
472     {
RefPocIsLessMfxHwH264Encode::RefPocIsLess473         RefPocIsLess(Recons const & recons, Dpb const & dpb)
474         : BasePredicateForRefPic(recons, dpb)
475         {
476         }
477 
operator ()MfxHwH264Encode::RefPocIsLess478         bool operator ()(mfxU8 l, mfxU8 r) const
479         {
480             return Less(GetPoc(m_dpb, l), GetPoc(m_dpb, r));
481         }
482     };
483 
484     struct RefPocIsGreater : public BasePredicateForRefPic
485     {
RefPocIsGreaterMfxHwH264Encode::RefPocIsGreater486         RefPocIsGreater(Recons const & recons, Dpb const & dpb)
487         : BasePredicateForRefPic(recons, dpb)
488         {
489         }
490 
operator ()MfxHwH264Encode::RefPocIsGreater491         bool operator ()(mfxU8 l, mfxU8 r) const
492         {
493             return Less(GetPoc(m_dpb, r), GetPoc(m_dpb, l));
494         }
495     };
496 
497     struct RefPocIsLessThan : public BasePredicateForRefPic
498     {
RefPocIsLessThanMfxHwH264Encode::RefPocIsLessThan499         RefPocIsLessThan(Recons const & recons, Dpb const & dpb, mfxU32 poc)
500         : BasePredicateForRefPic(recons, dpb)
501         , m_poc(poc)
502         {
503         }
504 
operator ()MfxHwH264Encode::RefPocIsLessThan505         bool operator ()(mfxU8 r) const
506         {
507             return Less(GetPoc(m_dpb, r), m_poc);
508         }
509 
510         mfxU32 m_poc;
511     };
512 
513     struct RefPocIsGreaterThan : public BasePredicateForRefPic
514     {
RefPocIsGreaterThanMfxHwH264Encode::RefPocIsGreaterThan515         RefPocIsGreaterThan(Recons const & recons, Dpb const & dpb, mfxU32 poc)
516         : BasePredicateForRefPic(recons, dpb)
517         , m_poc(poc)
518         {
519         }
520 
operator ()MfxHwH264Encode::RefPocIsGreaterThan521         bool operator ()(mfxU8 r) const
522         {
523             return Less(m_poc, GetPoc(m_dpb, r));
524         }
525 
526         mfxU32 m_poc;
527     };
528 
529     struct RefIsShortTerm : public BasePredicateForRefPic
530     {
RefIsShortTermMfxHwH264Encode::RefIsShortTerm531         RefIsShortTerm(Recons const & recons, Dpb const & dpb)
532         : BasePredicateForRefPic(recons, dpb)
533         {
534         }
535 
operator ()MfxHwH264Encode::RefIsShortTerm536         bool operator ()(mfxU8 r) const
537         {
538             return m_recons[m_dpb[r & 127].m_frameIdx].m_reference[r >> 7] && !m_dpb[r & 127].m_longterm;
539         }
540     };
541 
542     struct RefIsLongTerm : public BasePredicateForRefPic
543     {
RefIsLongTermMfxHwH264Encode::RefIsLongTerm544         RefIsLongTerm(Recons const & recons, Dpb const & dpb)
545         : BasePredicateForRefPic(recons, dpb)
546         {
547         }
548 
operator ()MfxHwH264Encode::RefIsLongTerm549         bool operator ()(mfxU8 r) const
550         {
551             return m_recons[m_dpb[r & 127].m_frameIdx].m_reference[r >> 7] && m_dpb[r & 127].m_longterm;
552         }
553     };
554 
555     struct RefIsFromHigherTemporalLayer : public BasePredicateForRefPic
556     {
RefIsFromHigherTemporalLayerMfxHwH264Encode::RefIsFromHigherTemporalLayer557         RefIsFromHigherTemporalLayer(Recons const & recons, Dpb const & dpb, mfxU32 currTid)
558         : BasePredicateForRefPic(recons, dpb)
559         , m_currTid(currTid)
560         {
561         }
562 
operator ()MfxHwH264Encode::RefIsFromHigherTemporalLayer563         bool operator ()(mfxU8 r) const
564         {
565             return m_currTid < m_recons[m_dpb[r & 127].m_frameIdx].m_tid;
566         }
567 
568         mfxU32 m_currTid;
569     };
570 
571     template <class T, class U>
572     struct LogicalAndHelper
573     {
574         typedef typename T::Arg Arg;
575         typedef typename T::Res Res;
576 
LogicalAndHelperMfxHwH264Encode::LogicalAndHelper577         LogicalAndHelper(T pr1, U pr2)
578         : m_pr1(pr1)
579         , m_pr2(pr2)
580         {
581         }
582 
operator ()MfxHwH264Encode::LogicalAndHelper583         Res operator ()(Arg arg) const
584         {
585             return m_pr1(arg) && m_pr2(arg);
586         }
587 
588         T m_pr1;
589         U m_pr2;
590     };
591 
592     template <class T, class U>
LogicalAnd(T pr1,U pr2)593     LogicalAndHelper<T, U> LogicalAnd(T pr1, U pr2)
594     {
595         return LogicalAndHelper<T, U>(pr1, pr2);
596     }
597 
598     template <class T>
599     struct LogicalNotHelper
600     {
601         typedef typename T::argument_type Arg;
602         typedef typename T::result_type   Res;
603 
LogicalNotHelperMfxHwH264Encode::LogicalNotHelper604         LogicalNotHelper(T pr)
605         : m_pr(pr)
606         {
607         }
608 
operator ()MfxHwH264Encode::LogicalNotHelper609         Res operator ()(Arg arg) const
610         {
611             return !m_pred(arg);
612         }
613 
614         T m_pr;
615     };
616 
617     template <class T>
LogicalNot(T pr)618     LogicalNotHelper<T> LogicalNot(T pr)
619     {
620         return LogicalNotHelper<T>(pr);
621     }
622 
623 
LongTermInList(std::vector<Reconstruct> const & recons,ArrayDpbFrame const & dpb,ArrayU8x33 const & list)624     bool LongTermInList(
625         std::vector<Reconstruct> const & recons,
626         ArrayDpbFrame const &            dpb,
627         ArrayU8x33 const &               list)
628     {
629         return list.End() == std::find_if(list.Begin(), list.End(), RefIsLongTerm(recons, dpb));
630     }
631 
632 
CalcTemporalLayerIndex(MfxVideoParam const & video,mfxI32 frameOrder)633     mfxU8 CalcTemporalLayerIndex(MfxVideoParam const & video, mfxI32 frameOrder)
634     {
635         mfxU8 i = 0;
636 
637         if (video.calcParam.numTemporalLayer > 0)
638         {
639             mfxU32 maxScale = video.calcParam.scale[video.calcParam.numTemporalLayer - 1];
640             for (; i < video.calcParam.numTemporalLayer; i++)
641                 if (frameOrder % (maxScale / video.calcParam.scale[i]) == 0)
642                     break;
643         }
644 
645         return i;
646     }
647 };
648 
649 /////////////////////////////////////////////////////////////////////////////////
650 // FrameTypeGenerator
651 
FrameTypeGenerator()652 FrameTypeGenerator::FrameTypeGenerator()
653     : m_frameOrder (0)    // in display order
654     , m_gopOptFlag (0)
655     , m_gopPicSize (0)
656     , m_gopRefDist (0)
657     , m_refBaseDist(0)   // key picture distance
658     , m_biPyramid  (0)
659     , m_idrDist    (0)
660 {
661 }
662 
Init(MfxVideoParam const & video)663 void FrameTypeGenerator::Init(MfxVideoParam const & video)
664 {
665     m_gopOptFlag = video.mfx.GopOptFlag;
666     m_gopPicSize = std::max<mfxU16>(video.mfx.GopPicSize, 1);
667     m_gopRefDist = std::max<mfxU16>(video.mfx.GopRefDist, 1);
668     m_idrDist    = m_gopPicSize * (video.mfx.IdrInterval + 1);
669 
670     mfxExtCodingOption2 & extOpt2 = GetExtBufferRef(video);
671     m_biPyramid = extOpt2.BRefType == MFX_B_REF_OFF ? 0 : extOpt2.BRefType;
672 
673     m_frameOrder = 0;
674 }
675 
676 namespace
677 {
GetEncodingOrder(mfxU32 displayOrder,mfxU32 begin,mfxU32 end,mfxU32 counter,bool & ref)678     mfxU32 GetEncodingOrder(mfxU32 displayOrder, mfxU32 begin, mfxU32 end, mfxU32 counter, bool & ref)
679     {
680         assert(displayOrder >= begin);
681         assert(displayOrder <  end);
682 
683         ref = (end - begin > 1);
684 
685         mfxU32 pivot = (begin + end) / 2;
686         if (displayOrder == pivot)
687             return counter;
688         else if (displayOrder < pivot)
689             return GetEncodingOrder(displayOrder, begin, pivot, counter + 1, ref);
690         else
691             return GetEncodingOrder(displayOrder, pivot + 1, end, counter + 1 + pivot - begin, ref);
692     }
693 }
694 
GetBiFrameLocation() const695 BiFrameLocation FrameTypeGenerator::GetBiFrameLocation() const
696 {
697     BiFrameLocation loc;
698 
699     if (m_biPyramid != 0)
700     {
701         bool ref = false;
702         mfxU32 orderInMiniGop = m_frameOrder % m_gopPicSize % m_gopRefDist - 1;
703 
704         loc.encodingOrder = GetEncodingOrder(orderInMiniGop, 0, m_gopRefDist - 1, 0, ref);
705         loc.miniGopCount  = m_frameOrder % m_gopPicSize / m_gopRefDist;
706         loc.refFrameFlag  = mfxU16(ref ? MFX_FRAMETYPE_REF : 0);
707     }
708 
709     return loc;
710 }
711 
Get() const712 PairU8 FrameTypeGenerator::Get() const
713 {
714     mfxU16 keyPicture = (m_refBaseDist && m_frameOrder % m_refBaseDist == 0) ? MFX_FRAMETYPE_KEYPIC : 0;
715 
716     if (m_frameOrder == 0)
717     {
718         return ExtendFrameType(MFX_FRAMETYPE_I | MFX_FRAMETYPE_REF | MFX_FRAMETYPE_IDR | keyPicture);
719     }
720 
721     if (m_frameOrder % m_gopPicSize == 0)
722     {
723         return ExtendFrameType(MFX_FRAMETYPE_I | MFX_FRAMETYPE_REF | keyPicture);
724     }
725 
726     if (m_frameOrder % m_gopPicSize % m_gopRefDist == 0)
727     {
728         return ExtendFrameType(MFX_FRAMETYPE_P | MFX_FRAMETYPE_REF | keyPicture);
729     }
730 
731     if ((m_gopOptFlag & MFX_GOP_STRICT) == 0)
732     {
733         if (((m_frameOrder + 1) % m_gopPicSize == 0 && (m_gopOptFlag & MFX_GOP_CLOSED)) ||
734             ((m_frameOrder + 1) % m_idrDist == 0))
735         {
736             // switch last B frame to P frame
737             return ExtendFrameType(MFX_FRAMETYPE_P | MFX_FRAMETYPE_REF | keyPicture);
738         }
739     }
740 
741     return ExtendFrameType(MFX_FRAMETYPE_B | keyPicture);
742 }
743 
Next()744 void FrameTypeGenerator::Next()
745 {
746     m_frameOrder = (m_frameOrder + 1) % m_idrDist;
747 }
748 
749 
UpdateRefFrames(ArrayDpbFrame const & dpb,DdiTask const & task,mfxU32 field)750 void TaskManager::UpdateRefFrames(
751     ArrayDpbFrame const & dpb,
752     DdiTask const &       task,
753     mfxU32                field)
754 {
755     mfxU32 ps = task.GetPicStructForEncode();
756 
757     for (mfxU32 i = 0; i < dpb.Size(); i++)
758     {
759         Reconstruct & ref = m_recons[dpb[i].m_frameIdx];
760 
761         if (dpb[i].m_longterm)
762         {
763             // update longTermPicNum
764             if (ps == MFX_PICSTRUCT_PROGRESSIVE)
765             {
766                 ref.m_longTermPicNum.top = ref.m_longTermFrameIdx;
767                 ref.m_longTermPicNum.bot = ref.m_longTermFrameIdx;
768             }
769             else
770             {
771                 ref.m_longTermPicNum.top = 2 * ref.m_longTermFrameIdx + mfxU8( !field);
772                 ref.m_longTermPicNum.bot = 2 * ref.m_longTermFrameIdx + mfxU8(!!field);
773             }
774         }
775         else
776         {
777             // update frameNumWrap
778             if (ref.m_frameNum > task.m_frameNum)
779             {
780                 ref.m_frameNumWrap = ref.m_frameNum - m_frameNumMax;
781             }
782             else
783             {
784                 ref.m_frameNumWrap = mfxI32(ref.m_frameNum);
785             }
786 
787             // update picNum
788             if (task.GetPicStructForEncode() & MFX_PICSTRUCT_PROGRESSIVE)
789             {
790                 ref.m_picNum.top = ref.m_frameNumWrap;
791                 ref.m_picNum.bot = ref.m_frameNumWrap;
792             }
793             else
794             {
795                 ref.m_picNum.top = 2 * ref.m_frameNumWrap + ( !field);
796                 ref.m_picNum.bot = 2 * ref.m_frameNumWrap + (!!field);
797             }
798         }
799     }
800 }
801 
CountL1Refs(Reconstruct const & bframe) const802 mfxU32 TaskManager::CountL1Refs(Reconstruct const & bframe) const
803 {
804     mfxU32 l1RefNum = 0;
805     for (mfxU32 i = 0; i < m_dpb.Size(); i++)
806     {
807         if (Less(bframe.m_frameOrder, m_recons[m_dpb[i].m_frameIdx].m_frameOrder))
808         {
809             l1RefNum++;
810         }
811     }
812 
813     return l1RefNum;
814 }
815 
816 
BuildRefPicLists(DdiTask & task,mfxU32 field)817 void TaskManager::BuildRefPicLists(
818     DdiTask & task,
819     mfxU32    field)
820 {
821     ArrayU8x33 list0Frm(0xff); // list0 built like current picture is frame
822     ArrayU8x33 list1Frm(0xff); // list1 built like current picture is frame
823 
824     ArrayU8x33    & list0 = task.m_list0[field];
825     ArrayU8x33    & list1 = task.m_list1[field];
826     ArrayDpbFrame & dpb   = task.m_dpb[field];
827 
828     mfxU32 useRefBasePicFlag = !!(task.m_type[field] & MFX_FRAMETYPE_KEYPIC);
829     if (task.GetPicStructForEncode() == MFX_PICSTRUCT_PROGRESSIVE)
830         field = TFIELD;
831 
832     // update frameNumWrap and picNum of reference frames in dpb
833     UpdateRefFrames(dpb, task, field);
834 
835     // build lists of reference frame
836     if (task.m_type[field] & MFX_FRAMETYPE_IDR)
837     {
838         // in MVC P or B frame can be IDR
839         // its DPB may be not empty
840         // however it shouldn't have inter-frame references
841     }
842     else if (task.m_type[field] & MFX_FRAMETYPE_P)
843     {
844         // 8.2.4.2.1-2 "Initialisation process for
845         // the reference picture list for P and SP slices in frames/fields"
846         for (mfxU32 i = 0; i < dpb.Size(); i++)
847             if (!dpb[i].m_longterm && (useRefBasePicFlag == dpb[i].m_refBase))
848                 list0Frm.PushBack(mfxU8(i));
849 
850         std::sort(
851             list0Frm.Begin(),
852             list0Frm.End(),
853             RefPicNumIsGreater(m_recons, dpb));
854 
855         mfxU8 * firstLongTerm = list0Frm.End();
856 
857         for (mfxU32 i = 0; i < dpb.Size(); i++)
858             if (dpb[i].m_longterm && (useRefBasePicFlag == dpb[i].m_refBase))
859                 list0Frm.PushBack(mfxU8(i));
860 
861         std::sort(
862             firstLongTerm,
863             list0Frm.End(),
864             LongTermRefPicNumIsLess(m_recons, dpb));
865     }
866     else if (task.m_type[field] & MFX_FRAMETYPE_B)
867     {
868         // 8.2.4.2.3-4 "Initialisation process for
869         // reference picture lists for B slices in frames/fields"
870         for (mfxU32 i = 0; i < dpb.Size(); i++)
871         {
872             if (!dpb[i].m_longterm && (useRefBasePicFlag == dpb[i].m_refBase))
873             {
874                 if (Less(dpb[i].m_poc[0], task.GetPoc(0)))
875                     list0Frm.PushBack(mfxU8(i));
876                 else
877                     list1Frm.PushBack(mfxU8(i));
878             }
879         }
880 
881         std::sort(
882             list0Frm.Begin(),
883             list0Frm.End(),
884             RefPocIsGreater(m_recons, dpb));
885 
886         std::sort(
887             list1Frm.Begin(),
888             list1Frm.End(),
889             RefPocIsLess(m_recons, dpb));
890 
891         // elements of list1 append list0
892         // elements of list0 append list1
893         mfxU32 list0Size = list0Frm.Size();
894         mfxU32 list1Size = list1Frm.Size();
895 
896         for (mfxU32 ref = 0; ref < list1Size; ref++)
897             list0Frm.PushBack(list1Frm[ref]);
898 
899         for (mfxU32 ref = 0; ref < list0Size; ref++)
900             list1Frm.PushBack(list0Frm[ref]);
901 
902         mfxU8 * firstLongTermL0 = list0Frm.End();
903         mfxU8 * firstLongTermL1 = list1Frm.End();
904 
905         for (mfxU32 i = 0; i < dpb.Size(); i++)
906         {
907             if (dpb[i].m_longterm && (useRefBasePicFlag == dpb[i].m_refBase))
908             {
909                 list0Frm.PushBack(mfxU8(i));
910                 list1Frm.PushBack(mfxU8(i));
911             }
912         }
913 
914         std::sort(
915             firstLongTermL0,
916             list0Frm.End(),
917             LongTermRefPicNumIsLess(m_recons, dpb));
918 
919         std::sort(
920             firstLongTermL1,
921             list1Frm.End(),
922             LongTermRefPicNumIsLess(m_recons, dpb));
923     }
924 
925     if (task.GetPicStructForEncode() & MFX_PICSTRUCT_PROGRESSIVE)
926     {
927         // just copy lists
928         list0 = list0Frm;
929         list1 = list1Frm;
930     }
931     else
932     {
933         // for interlaced picture we need to perform
934         // 8.2.4.2.5 "Initialisation process for reference picture lists in fields"
935 
936         list0.Resize(0);
937         list1.Resize(0);
938 
939         ProcessFields(field, dpb, list0Frm, list0);
940         ProcessFields(field, dpb, list1Frm, list1);
941     }
942 
943     // "When the reference picture list RefPicList1 has more than one entry
944     // and RefPicList1 is identical to the reference picture list RefPicList0,
945     // the first two entries RefPicList1[0] and RefPicList1[1] are switched"
946     if (list1.Size() > 1 && list0 == list1)
947     {
948         std::swap(list1[0], list1[1]);
949     }
950 
951     task.m_initSizeList0[field] = list0.Size();
952     task.m_initSizeList1[field] = list1.Size();
953 }
954 
ProcessFields(mfxU32 field,ArrayDpbFrame const & dpb,ArrayU8x33 const & picListFrm,ArrayU8x33 & picListFld) const955 void TaskManager::ProcessFields(
956     mfxU32                field,
957     ArrayDpbFrame const & dpb,
958     ArrayU8x33 const &    picListFrm,
959     ArrayU8x33 &          picListFld) const
960 {
961     // 8.2.4.2.5 "Initialisation process for reference picture lists in fields"
962     mfxU32 idxSameParity = 0; // index in frameList
963     mfxU32 idxOppositeParity = 0; // index in frameList
964     mfxU32 sameParity = field;
965     mfxU32 oppositeParity = !field;
966 
967     picListFld.Resize(0);
968 
969     while (idxSameParity < picListFrm.Size() || idxOppositeParity < picListFrm.Size())
970     {
971         for (; idxSameParity < picListFrm.Size(); idxSameParity++)
972         {
973             if (m_recons[dpb[picListFrm[idxSameParity]].m_frameIdx].m_reference[sameParity])
974             {
975                 picListFld.PushBack(picListFrm[idxSameParity]);
976                 if (field == BFIELD)
977                     picListFld.Back() |= 0x80;
978 
979                 idxSameParity++;
980                 break;
981             }
982         }
983         for (; idxOppositeParity < picListFrm.Size(); idxOppositeParity++)
984         {
985             if (m_recons[dpb[picListFrm[idxOppositeParity]].m_frameIdx].m_reference[oppositeParity])
986             {
987                 picListFld.PushBack(picListFrm[idxOppositeParity]);
988                 if (field == TFIELD)
989                     picListFld.Back() |= 0x80;
990 
991                 idxOppositeParity++;
992                 break;
993             }
994         }
995     }
996 }
997 
IsSubmitted(DdiTask const & task) const998 bool TaskManager::IsSubmitted(DdiTask const & task) const
999 {
1000     return task.m_idxBs.top != NO_INDEX && !m_bitstreams[task.m_idxBs.top % m_bitstreams.size()].IsFree();
1001 }
1002 
TaskManager()1003 TaskManager::TaskManager()
1004 : m_core(NULL)
1005 , m_stat()
1006 , m_frameNum(0)
1007 , m_frameNumMax(0)
1008 , m_frameOrder(0)
1009 , m_frameOrderIdr(0)
1010 , m_frameOrderI(0)
1011 , m_idrPicId(0)
1012 , m_viewIdx(0)
1013 , m_cpbRemoval(0)
1014 , m_cpbRemovalBufferingPeriod(0)
1015 , m_numReorderFrames(0)
1016 , m_pushed(0)
1017 {
1018 }
1019 
~TaskManager()1020 TaskManager::~TaskManager()
1021 {
1022     Close();
1023 }
1024 
Init(VideoCORE * core,MfxVideoParam const & video,mfxU32 viewIdx)1025 void TaskManager::Init(
1026     VideoCORE *           core,
1027     MfxVideoParam const & video,
1028     mfxU32                viewIdx)
1029 {
1030     m_core    = core;
1031     m_viewIdx = viewIdx;
1032 
1033     m_frameNum = 0;
1034     m_frameNumMax = 256;
1035     m_frameOrder = 0;
1036     m_frameOrderI = 0;
1037     m_frameOrderIdr = 0;
1038     m_idrPicId = 0;
1039 
1040     m_video = video;
1041 
1042     if (m_video.IOPattern == MFX_IOPATTERN_IN_OPAQUE_MEMORY)
1043     {
1044         mfxExtOpaqueSurfaceAlloc * extOpaq = GetExtBuffer(m_video);
1045         m_video.IOPattern = (extOpaq->In.Type & MFX_MEMTYPE_SYSTEM_MEMORY)
1046             ? mfxU16(MFX_IOPATTERN_IN_SYSTEM_MEMORY)
1047             : mfxU16(MFX_IOPATTERN_IN_VIDEO_MEMORY);
1048     }
1049 
1050     m_numReorderFrames = GetNumReorderFrames(video);
1051 
1052     m_dpb.Resize(0);
1053 
1054     m_frameTypeGen.Init(m_video);
1055     m_bitstreams.resize(CalcNumSurfBitstream(m_video));
1056     m_recons.resize(CalcNumSurfRecon(m_video));
1057     m_tasks.resize(CalcNumTasks(m_video));
1058 
1059     // need raw surfaces only when input surfaces are in system memory
1060     if (m_video.IOPattern == MFX_IOPATTERN_IN_SYSTEM_MEMORY)
1061         m_raws.resize(CalcNumSurfRaw(m_video));
1062 
1063     Zero(m_stat);
1064 }
1065 
Reset(MfxVideoParam const & video)1066 void TaskManager::Reset(MfxVideoParam const & video)
1067 {
1068     mfxExtSpsHeader const * extSpsNew = GetExtBuffer(video);
1069     mfxExtSpsHeader const * extSpsOld = GetExtBuffer(m_video);
1070 
1071     if (!Equal(*extSpsNew, *extSpsOld))
1072     {
1073         m_dpb.Resize(0);
1074         m_idrPicId = 0;
1075         m_frameTypeGen.Init(video);
1076         Zero(m_stat);
1077 
1078         std::for_each(m_raws.begin(), m_raws.end(), SetSurfaceFree);
1079         std::for_each(m_bitstreams.begin(), m_bitstreams.end(), SetSurfaceFree);
1080         std::for_each(m_tasks.begin(), m_tasks.end(), SetReconstructFreeAndDecRef(*m_core));
1081         std::for_each(m_recons.begin(), m_recons.end(), SetReconstructFreeAndDecRef(*m_core));
1082     }
1083 
1084     m_video = video;
1085     m_numReorderFrames = GetNumReorderFrames(m_video);
1086 }
1087 
Close()1088 void TaskManager::Close()
1089 {
1090     UMC::AutomaticUMCMutex guard(m_mutex);
1091     Clear(m_tasks);
1092     Clear(m_raws);
1093     Clear(m_bitstreams);
1094     Clear(m_recons);
1095 }
1096 
FindFreeSurface(std::vector<T> const & vec)1097 template<typename T> mfxU32 FindFreeSurface(std::vector<T> const & vec)
1098 {
1099     for (size_t j = 0; j < vec.size(); j++)
1100     {
1101         if (vec[j].IsFree())
1102         {
1103             return (mfxU32)j;
1104         }
1105     }
1106 
1107     return NO_INDEX;
1108 }
1109 
SwitchLastB2P()1110 void TaskManager::SwitchLastB2P()
1111 {
1112     DdiTask* latestFrame = 0;
1113     for (size_t i = 0; i < m_tasks.size(); i++)
1114     {
1115         DdiTask& task = m_tasks[i];
1116 
1117         if (task.IsFree() || task.m_bs != 0)
1118         {
1119             continue;
1120         }
1121 
1122         if (latestFrame == 0 || Less(latestFrame->m_frameOrder, task.m_frameOrder))
1123         {
1124             latestFrame = &task;
1125         }
1126     }
1127 
1128     if (latestFrame)
1129     {
1130         mfxU8 frameType = latestFrame->GetFrameType();
1131 
1132         if ((frameType & MFX_FRAMETYPE_B) &&
1133             (CountL1Refs(*latestFrame) == 0))
1134         {
1135             latestFrame->m_type.top = latestFrame->m_type.bot = MFX_FRAMETYPE_P | MFX_FRAMETYPE_REF;
1136         }
1137     }
1138 }
1139 
SelectNextBFrameFromTail()1140 DdiTask * TaskManager::SelectNextBFrameFromTail()
1141 {
1142     DdiTask * toEncode = 0;
1143 
1144     for (size_t i = 0; i < m_tasks.size(); i++)
1145     {
1146         DdiTask & task = m_tasks[i];
1147 
1148         if (task.IsFree() || IsSubmitted(task))
1149         {
1150             continue;
1151         }
1152 
1153         if ((task.GetFrameType() & MFX_FRAMETYPE_B) == 0)
1154             return 0;
1155 
1156         if (toEncode == 0 || Less(task.m_frameOrder, toEncode->m_frameOrder))
1157         {
1158             toEncode = &task;
1159         }
1160     }
1161 
1162     return toEncode;
1163 }
1164 
FindFrameToEncode()1165 DdiTask * TaskManager::FindFrameToEncode()
1166 {
1167     DdiTask * toEncode = 0;
1168 
1169     for (size_t i = 0; i < m_tasks.size(); i++)
1170     {
1171         DdiTask & task = m_tasks[i];
1172 
1173         if (task.IsFree() || IsSubmitted(task))
1174         {
1175             continue;
1176         }
1177 
1178         if (toEncode == 0 || Less(task.m_frameOrder, toEncode->m_frameOrder))
1179         {
1180             if ((task.GetFrameType() & MFX_FRAMETYPE_B) == 0 ||
1181                 (CountL1Refs(task) > 0))
1182             {
1183                 toEncode = &task;
1184             }
1185         }
1186     }
1187 
1188     if (toEncode)
1189     {
1190         mfxU32 idrPicFlag   = !!(toEncode->GetFrameType() & MFX_FRAMETYPE_IDR);
1191         mfxU32 intraPicFlag = !!(toEncode->GetFrameType() & MFX_FRAMETYPE_I);
1192         mfxU32 closedGop    = !!(m_video.mfx.GopOptFlag & MFX_GOP_CLOSED);
1193         mfxU32 strictGop    = !!(m_video.mfx.GopOptFlag & MFX_GOP_STRICT);
1194 
1195         if (!strictGop && (idrPicFlag || (intraPicFlag && closedGop)))
1196         {
1197             // find latest B frame prior to current I frame
1198             // since gop is closed such B frames will be encoded with only L1 ref
1199             // since gop is not strict it is possible (and better)
1200             // to switch latest B frame to P and encode other B frame before new gop begins
1201             DdiTask * latestBframe = 0;
1202             for (size_t i = 0; i < m_tasks.size(); i++)
1203             {
1204                 if (m_tasks[i].IsFree() || IsSubmitted(m_tasks[i]))
1205                     continue;
1206 
1207                 if ((m_tasks[i].GetFrameType() & MFX_FRAMETYPE_B) &&
1208                     Less(m_tasks[i].m_frameOrder, toEncode->m_frameOrder) &&
1209                     (latestBframe == 0 || Less(latestBframe->m_frameOrder, m_tasks[i].m_frameOrder)))
1210                 {
1211                     latestBframe = &m_tasks[i];
1212                 }
1213             }
1214 
1215             if (latestBframe)
1216             {
1217                 latestBframe->m_type[0] = latestBframe->m_type[1] = MFX_FRAMETYPE_PREF;
1218                 toEncode = latestBframe;
1219             }
1220         }
1221     }
1222 
1223     if (toEncode && toEncode->GetFrameType() & MFX_FRAMETYPE_B)
1224     {
1225         for (size_t i = 0; i < m_tasks.size(); i++)
1226         {
1227             if (m_tasks[i].IsFree() || IsSubmitted(m_tasks[i]))
1228                 continue;
1229 
1230             if ((m_tasks[i].GetFrameType() & MFX_FRAMETYPE_B) &&
1231                 m_tasks[i].m_loc.miniGopCount == toEncode->m_loc.miniGopCount &&
1232                 m_tasks[i].m_loc.encodingOrder < toEncode->m_loc.encodingOrder)
1233             {
1234                 toEncode = &m_tasks[i];
1235             }
1236         }
1237     }
1238 
1239     return toEncode;
1240 }
1241 
1242 namespace HwUtils
1243 {
1244     struct FindInDpbByExtFrameTag
1245     {
FindInDpbByExtFrameTagHwUtils::FindInDpbByExtFrameTag1246         FindInDpbByExtFrameTag(
1247             std::vector<Reconstruct> const & recons,
1248             mfxU32                           extFrameTag)
1249         : m_recons(recons)
1250         , m_extFrameTag(extFrameTag)
1251         {
1252         }
1253 
operator ()HwUtils::FindInDpbByExtFrameTag1254         bool operator ()(DpbFrame const & dpbFrame) const
1255         {
1256             return m_recons[dpbFrame.m_frameIdx].m_extFrameTag == m_extFrameTag;
1257         }
1258 
1259         std::vector<Reconstruct> const & m_recons;
1260         mfxU32                           m_extFrameTag;
1261     };
1262 
1263     struct FindInDpbByFrameOrder
1264     {
FindInDpbByFrameOrderHwUtils::FindInDpbByFrameOrder1265         FindInDpbByFrameOrder(
1266             std::vector<Reconstruct> const & recons,
1267             mfxU32                           frameOrder)
1268             : m_recons(recons)
1269             , m_frameOrder(frameOrder)
1270         {
1271         }
1272 
operator ()HwUtils::FindInDpbByFrameOrder1273         bool operator ()(DpbFrame const & dpbFrame) const
1274         {
1275             return m_recons[dpbFrame.m_frameIdx].m_frameOrder == m_frameOrder;
1276         }
1277 
1278         std::vector<Reconstruct> const & m_recons;
1279         mfxU32                           m_frameOrder;
1280     };
1281 
1282     struct OrderByFrameNumWrap
1283     {
OrderByFrameNumWrapHwUtils::OrderByFrameNumWrap1284         OrderByFrameNumWrap(
1285             std::vector<Reconstruct> const & recons)
1286         : m_recons(recons)
1287         {
1288         }
1289 
operator ()HwUtils::OrderByFrameNumWrap1290         bool operator ()(DpbFrame const & lhs, DpbFrame const & rhs) const
1291         {
1292             if (!lhs.m_longterm && !rhs.m_longterm)
1293                 if (lhs.m_refBase == rhs.m_refBase)
1294                     return m_recons[lhs.m_frameIdx].m_frameNumWrap < m_recons[rhs.m_frameIdx].m_frameNumWrap;
1295                 else
1296                     return lhs.m_refBase > rhs.m_refBase;
1297             else if (!lhs.m_longterm && rhs.m_longterm)
1298                 return true;
1299             else if (lhs.m_longterm && !rhs.m_longterm)
1300                 return false;
1301             else // both long term
1302                 return m_recons[lhs.m_frameIdx].m_longTermPicNum[0] < m_recons[rhs.m_frameIdx].m_longTermPicNum[0];
1303         }
1304 
1305         std::vector<Reconstruct> const & m_recons;
1306     };
1307 
1308     struct OrderByFrameNumWrapKeyRef
1309     {
OrderByFrameNumWrapKeyRefHwUtils::OrderByFrameNumWrapKeyRef1310         OrderByFrameNumWrapKeyRef(
1311             std::vector<Reconstruct> const & recons)
1312             : m_recons(recons)
1313         {
1314         }
1315 
operator ()HwUtils::OrderByFrameNumWrapKeyRef1316         bool operator ()(DpbFrame const & lhs, DpbFrame const & rhs) const
1317         {
1318             if (!lhs.m_longterm && !rhs.m_longterm)
1319             {
1320                 if (lhs.m_refBase == rhs.m_refBase)
1321                 {
1322                     if (lhs.m_keyRef != rhs.m_keyRef)
1323                         return (lhs.m_keyRef < rhs.m_keyRef);
1324                     else
1325                         return (m_recons[lhs.m_frameIdx].m_frameNumWrap < m_recons[rhs.m_frameIdx].m_frameNumWrap);
1326                 }
1327                 else
1328                     return lhs.m_refBase > rhs.m_refBase;
1329             }
1330             else if (!lhs.m_longterm && rhs.m_longterm)
1331                 return true;
1332             else if (lhs.m_longterm && !rhs.m_longterm)
1333                 return false;
1334             else // both long term
1335                 return m_recons[lhs.m_frameIdx].m_longTermPicNum[0] < m_recons[rhs.m_frameIdx].m_longTermPicNum[0];
1336         }
1337 
1338         std::vector<Reconstruct> const & m_recons;
1339     };
1340     struct OrderByDisplayOrder
1341     {
OrderByDisplayOrderHwUtils::OrderByDisplayOrder1342         OrderByDisplayOrder(
1343             std::vector<Reconstruct> const & recons)
1344         : m_recons(recons)
1345         {
1346         }
1347 
operator ()HwUtils::OrderByDisplayOrder1348         bool operator ()(DpbFrame const & lhs, DpbFrame const & rhs) const
1349         {
1350             return m_recons[lhs.m_frameIdx].m_frameOrder < m_recons[rhs.m_frameIdx].m_frameOrder;
1351         }
1352 
1353         std::vector<Reconstruct> const & m_recons;
1354     };
1355 
1356     struct OrderByDisplayOrderKeyRef
1357     {
OrderByDisplayOrderKeyRefHwUtils::OrderByDisplayOrderKeyRef1358         OrderByDisplayOrderKeyRef(
1359             std::vector<Reconstruct> const & recons)
1360             : m_recons(recons)
1361         {
1362         }
1363 
operator ()HwUtils::OrderByDisplayOrderKeyRef1364         bool operator ()(DpbFrame const & lhs, DpbFrame const & rhs) const
1365         {
1366             if (lhs.m_keyRef != rhs.m_keyRef)
1367                 return (lhs.m_keyRef < rhs.m_keyRef);
1368             return m_recons[lhs.m_frameIdx].m_frameOrder < m_recons[rhs.m_frameIdx].m_frameOrder;
1369         }
1370 
1371         std::vector<Reconstruct> const & m_recons;
1372     };
1373 };
1374 
UpdateMaxLongTermFrameIdxPlus1(ArrayU8x8 & arr,mfxU8 curTidx,mfxU8 val)1375 void UpdateMaxLongTermFrameIdxPlus1(ArrayU8x8 & arr, mfxU8 curTidx, mfxU8 val)
1376 {
1377     std::fill(arr.Begin() + curTidx, arr.End(), val);
1378 }
1379 
UpdateDpb(DdiTask & task,mfxU32 fieldId,ArrayDpbFrame & dpbPostEncoding)1380 void TaskManager::UpdateDpb(
1381     DdiTask       & task,
1382     mfxU32          fieldId,
1383     ArrayDpbFrame & dpbPostEncoding)
1384 {
1385     // declare shorter names
1386     ArrayDpbFrame const &  initDpb  = task.m_dpb[fieldId];
1387     ArrayDpbFrame &        currDpb  = dpbPostEncoding;
1388     ArrayU8x8 &            maxLtIdx = currDpb.m_maxLongTermFrameIdxPlus1;
1389     mfxU32                 type     = task.m_type[fieldId];
1390     DecRefPicMarkingInfo & marking  = task.m_decRefPicMrk[fieldId];
1391 
1392     // marking commands will be applied to dpbPostEncoding
1393     // initial dpb stay unchanged
1394     currDpb = initDpb;
1395 
1396     if ((type & MFX_FRAMETYPE_REF) == 0)
1397         return; // non-reference frames doesn't change dpb
1398     mfxExtAVCRefListCtrl const * ext_ctrl = GetExtBuffer(task.m_ctrl);
1399     mfxExtAVCRefListCtrl const * ctrl = (task.m_internalListCtrlPresent && (task.m_internalListCtrlHasPriority || !ext_ctrl))
1400         ? &task.m_internalListCtrl
1401         : ext_ctrl;
1402 
1403     bool useInternalFrameOrder = false;
1404     if (ctrl && ctrl == &task.m_internalListCtrl) useInternalFrameOrder = true;
1405 
1406     if (type & MFX_FRAMETYPE_IDR)
1407     {
1408         bool currFrameIsLongTerm = false;
1409 
1410         currDpb.Resize(0);
1411         UpdateMaxLongTermFrameIdxPlus1(maxLtIdx, 0, 0);
1412 
1413         marking.long_term_reference_flag = 0;
1414 
1415         if (ctrl)
1416         {
1417             for (mfxU32 i = 0; i < 16 && ctrl->LongTermRefList[i].FrameOrder != 0xffffffff; i++)
1418             {
1419                 if (ctrl->LongTermRefList[i].FrameOrder == (useInternalFrameOrder ? task.m_frameOrder : task.m_extFrameTag))
1420                 {
1421                     marking.long_term_reference_flag = 1;
1422                     currFrameIsLongTerm = true;
1423                     task.m_longTermFrameIdx = 0;
1424                     break;
1425                 }
1426             }
1427         }
1428 
1429         DpbFrame newDpbFrame;
1430         newDpbFrame.m_frameIdx = mfxU8(task.m_idxRecon);
1431         newDpbFrame.m_poc      = task.GetPoc();
1432         newDpbFrame.m_viewIdx  = mfxU16(task.m_viewIdx);
1433         newDpbFrame.m_longterm = currFrameIsLongTerm;
1434         newDpbFrame.m_keyRef = task.m_keyReference;
1435         currDpb.PushBack(newDpbFrame);
1436         if (task.m_storeRefBasePicFlag)
1437         {
1438             newDpbFrame.m_refBase = 1;
1439             currDpb.PushBack(newDpbFrame);
1440         }
1441         UpdateMaxLongTermFrameIdxPlus1(maxLtIdx, 0, marking.long_term_reference_flag);
1442     }
1443     else
1444     {
1445         mfxU32 ffid = task.GetFirstField();
1446 
1447         bool currFrameIsAddedToDpb = (fieldId != ffid) && (task.m_type[ffid] & MFX_FRAMETYPE_REF);
1448 
1449         // collect used long-term frame indices
1450         ArrayU8x16 usedLtIdx;
1451         usedLtIdx.Resize(16, 0);
1452         for (mfxU32 i = 0; i < initDpb.Size(); i++)
1453             if (initDpb[i].m_longterm)
1454                 usedLtIdx[m_recons[initDpb[i].m_frameIdx].m_longTermFrameIdx] = 1;
1455 
1456         // check longterm list
1457         // when frameOrder is sent first time corresponding 'short-term' reference is marked 'long-term'
1458         // when frameOrder is sent second time corresponding 'long-term' reference is marked 'unused'
1459         if (ctrl)
1460         {
1461             // adaptive marking is supported only for progressive encoding
1462             assert(task.GetPicStructForEncode() == MFX_PICSTRUCT_PROGRESSIVE);
1463 
1464             for (mfxU32 i = 0; i < 16 && ctrl->RejectedRefList[i].FrameOrder != static_cast<mfxU32>(MFX_FRAMEORDER_UNKNOWN); i++)
1465             {
1466                 DpbFrame * ref = currDpb.End();
1467 
1468                 if (!useInternalFrameOrder)
1469                 {
1470                     ref = std::find_if(
1471                         currDpb.Begin(),
1472                         currDpb.End(),
1473                         HwUtils::FindInDpbByExtFrameTag(m_recons, ctrl->RejectedRefList[i].FrameOrder));
1474                 }
1475                 else
1476                 {
1477                     ref = std::find_if(
1478                         currDpb.Begin(),
1479                         currDpb.End(),
1480                         HwUtils::FindInDpbByFrameOrder(m_recons, ctrl->RejectedRefList[i].FrameOrder));
1481                 }
1482 
1483                 if (ref != currDpb.End())
1484                 {
1485                     if (ref->m_longterm)
1486                     {
1487                         marking.PushBack(MMCO_LT_TO_UNUSED, m_recons[ref->m_frameIdx].m_longTermPicNum[0]);
1488                         usedLtIdx[m_recons[ref->m_frameIdx].m_longTermFrameIdx] = 0;
1489                     }
1490                     else
1491                     {
1492                         Reconstruct const & recon = m_recons[ref->m_frameIdx];
1493                         marking.PushBack(MMCO_ST_TO_UNUSED, task.m_picNum[fieldId] - recon.m_picNum[0] - 1);
1494                     }
1495 
1496                     currDpb.Erase(ref);
1497                 }
1498             }
1499 
1500             for (mfxU32 i = 0; i < 16 && ctrl->LongTermRefList[i].FrameOrder != static_cast<mfxU32>(MFX_FRAMEORDER_UNKNOWN); i++)
1501             {
1502                 DpbFrame * dpbFrame = currDpb.End();
1503                 if (!useInternalFrameOrder)
1504                 {
1505                     dpbFrame = std::find_if(
1506                         currDpb.Begin(),
1507                         currDpb.End(),
1508                         HwUtils::FindInDpbByExtFrameTag(m_recons, ctrl->LongTermRefList[i].FrameOrder));
1509                 }
1510                 else
1511                 {
1512                     dpbFrame = std::find_if(
1513                         currDpb.Begin(),
1514                         currDpb.End(),
1515                         HwUtils::FindInDpbByFrameOrder(m_recons, ctrl->LongTermRefList[i].FrameOrder));
1516                 }
1517 
1518                 if (dpbFrame != currDpb.End() && dpbFrame->m_longterm == 0)
1519                 {
1520                     Reconstruct & ref = m_recons[dpbFrame->m_frameIdx];
1521 
1522                     // find free long-term frame index
1523                     mfxU8 longTermIdx = mfxU8(std::find(usedLtIdx.Begin(), usedLtIdx.End(), 0) - usedLtIdx.Begin());
1524                     assert(longTermIdx != usedLtIdx.Size());
1525                     if (longTermIdx == usedLtIdx.Size())
1526                         break;
1527 
1528                     if (longTermIdx >= maxLtIdx[task.m_tidx])
1529                     {
1530                         // need to update MaxLongTermFrameIdx
1531                         assert(longTermIdx < m_video.mfx.NumRefFrame);
1532                         marking.PushBack(MMCO_SET_MAX_LT_IDX, longTermIdx + 1);
1533                         UpdateMaxLongTermFrameIdxPlus1(maxLtIdx, task.m_tidx, longTermIdx + 1);
1534                     }
1535 
1536                     marking.PushBack(MMCO_ST_TO_LT, task.m_picNum[fieldId] - ref.m_picNum[0] - 1, longTermIdx);
1537                     usedLtIdx[longTermIdx] = 1;
1538                     ref.m_longTermFrameIdx = longTermIdx;
1539 
1540                     dpbFrame->m_longterm = 1;
1541                 }
1542                 else if (ctrl->LongTermRefList[i].FrameOrder == (useInternalFrameOrder ? task.m_frameOrder : task.m_extFrameTag))
1543                 {
1544                     // frame is not in dpb, but it is a current frame
1545                     // mark it as 'long-term'
1546 
1547                     // first make free space in dpb if it is full
1548                     if (currDpb.Size() == m_video.mfx.NumRefFrame)
1549                     {
1550                         DpbFrame * toRemove = std::min_element(currDpb.Begin(), currDpb.End(), HwUtils::OrderByFrameNumWrap(m_recons));
1551 
1552                         assert(toRemove != currDpb.End());
1553                         if (toRemove == currDpb.End())
1554                             break;
1555 
1556                         if (toRemove->m_longterm == 1)
1557                         {
1558                             // no short-term reference in dpb
1559                             // remove oldest long-term
1560                             toRemove = std::min_element(
1561                                 currDpb.Begin(),
1562                                 currDpb.End(),
1563                                 HwUtils::OrderByDisplayOrder(m_recons));
1564                             assert(toRemove->m_longterm == 1); // must be longterm ref
1565 
1566                             Reconstruct const & ref = m_recons[toRemove->m_frameIdx];
1567                             marking.PushBack(MMCO_LT_TO_UNUSED, ref.m_longTermPicNum[0]);
1568                             usedLtIdx[ref.m_longTermFrameIdx] = 0;
1569                         }
1570                         else
1571                         {
1572                             Reconstruct const & ref = m_recons[toRemove->m_frameIdx];
1573                             marking.PushBack(MMCO_ST_TO_UNUSED, task.m_picNum[fieldId] - ref.m_picNum[0] - 1);
1574                         }
1575 
1576                         currDpb.Erase(toRemove);
1577                     }
1578 
1579                     // find free long-term frame index
1580                     mfxU8 longTermIdx = mfxU8(std::find(usedLtIdx.Begin(), usedLtIdx.End(), 0) - usedLtIdx.Begin());
1581                     assert(longTermIdx != usedLtIdx.Size());
1582                     if (longTermIdx == usedLtIdx.Size())
1583                         break;
1584 
1585                     if (longTermIdx >= maxLtIdx[task.m_tidx])
1586                     {
1587                         // need to update MaxLongTermFrameIdx
1588                         assert(longTermIdx < m_video.mfx.NumRefFrame);
1589                         marking.PushBack(MMCO_SET_MAX_LT_IDX, longTermIdx + 1);
1590                         UpdateMaxLongTermFrameIdxPlus1(maxLtIdx, task.m_tidx, longTermIdx + 1);
1591                     }
1592 
1593                     marking.PushBack(MMCO_CURR_TO_LT, longTermIdx);
1594                     usedLtIdx[longTermIdx] = 1;
1595                     task.m_longTermFrameIdx = longTermIdx;
1596 
1597                     DpbFrame newDpbFrame;
1598                     newDpbFrame.m_frameIdx = mfxU8(task.m_idxRecon);
1599                     newDpbFrame.m_poc      = task.GetPoc();
1600                     newDpbFrame.m_viewIdx  = mfxU16(task.m_viewIdx);
1601                     newDpbFrame.m_longterm = true;
1602                     newDpbFrame.m_keyRef = task.m_keyReference;
1603                     currDpb.PushBack(newDpbFrame);
1604                     assert(currDpb.Size() <= m_video.mfx.NumRefFrame);
1605 
1606                     currFrameIsAddedToDpb = true;
1607                 }
1608             }
1609         }
1610 
1611         // if first field was a reference then entire frame is already in dpb
1612         if (!currFrameIsAddedToDpb)
1613         {
1614             for (mfxU32 refBase = 0; refBase <= task.m_storeRefBasePicFlag; refBase++)
1615             {
1616                 if (currDpb.Size() == m_video.mfx.NumRefFrame)
1617                 {
1618                     DpbFrame * toRemove = std::min_element(currDpb.Begin(), currDpb.End(), HwUtils::OrderByFrameNumWrap(m_recons));
1619                     assert(toRemove != currDpb.End());
1620                     if (toRemove == currDpb.End())
1621                         return;
1622 
1623                     if (toRemove->m_longterm == 1)
1624                     {
1625                         // no short-term reference in dpb
1626                         // remove oldest long-term
1627                         toRemove = std::min_element(
1628                             currDpb.Begin(),
1629                             currDpb.End(),
1630                             HwUtils::OrderByDisplayOrder(m_recons));
1631                         assert(toRemove->m_longterm == 1); // must be longterm ref
1632 
1633                         Reconstruct const & ref = m_recons[toRemove->m_frameIdx];
1634                         marking.PushBack(MMCO_LT_TO_UNUSED, ref.m_longTermPicNum[0]);
1635                         usedLtIdx[ref.m_longTermFrameIdx] = 0;
1636                     }
1637                     else if (marking.mmco.Size() > 0)
1638                     {
1639                         // already have mmco commands, sliding window will not be invoked
1640                         // remove oldest short-term manually
1641                         Reconstruct const & ref = m_recons[toRemove->m_frameIdx];
1642                         marking.PushBack(MMCO_ST_TO_UNUSED, task.m_picNum[fieldId] - ref.m_picNum[0] - 1);
1643                     }
1644 
1645                     currDpb.Erase(toRemove);
1646                 }
1647 
1648                 DpbFrame newDpbFrame;
1649                 newDpbFrame.m_frameIdx = mfxU8(task.m_idxRecon);
1650                 newDpbFrame.m_poc      = task.GetPoc();
1651                 newDpbFrame.m_viewIdx  = mfxU16(task.m_viewIdx);
1652                 newDpbFrame.m_longterm = 0;
1653                 newDpbFrame.m_refBase  = (mfxU8)refBase;
1654                 newDpbFrame.m_keyRef = task.m_keyReference;
1655                 currDpb.PushBack(newDpbFrame);
1656                 assert(currDpb.Size() <= m_video.mfx.NumRefFrame);
1657             }
1658         }
1659     }
1660 }
1661 
1662 namespace
1663 {
DecideOnRefPicFlag(MfxVideoParam const & video,DdiTask & task)1664     void DecideOnRefPicFlag(MfxVideoParam const & video, DdiTask & task)
1665     {
1666         mfxU32 numLayers = video.calcParam.numTemporalLayer;
1667         if (numLayers > 1)
1668         {
1669             Pair<mfxU8> & ft = task.m_type;
1670 
1671             mfxU32 lastLayerScale =
1672                 video.calcParam.scale[numLayers - 1] /
1673                 video.calcParam.scale[numLayers - 2];
1674 
1675             if (((ft[0] | ft[1]) & MFX_FRAMETYPE_REF) &&    // one of fields is ref pic
1676                 numLayers > 1 &&                            // more than one temporal layer
1677                 lastLayerScale == 2 &&                      // highest layer is dyadic
1678                 (task.m_tidx + 1U) == numLayers)               // this is the highest layer
1679             {
1680                 ft[0] &= ~MFX_FRAMETYPE_REF;
1681                 ft[1] &= ~MFX_FRAMETYPE_REF;
1682             }
1683         }
1684     }
1685 
FindOldestRef(std::vector<Reconstruct> const & recons,ArrayDpbFrame const & dpb,mfxU32 tidx)1686     Reconstruct const * FindOldestRef(
1687         std::vector<Reconstruct> const & recons,
1688         ArrayDpbFrame const &            dpb,
1689         mfxU32                           tidx)
1690     {
1691         Reconstruct const * oldest = 0;
1692 
1693         DpbFrame const * i = dpb.Begin();
1694         DpbFrame const * e = dpb.End();
1695 
1696         for (; i != e; ++i)
1697             if (recons[i->m_frameIdx].m_tidx == tidx)
1698                 oldest = &recons[i->m_frameIdx];
1699 
1700         for (; i != e; ++i)
1701             if (recons[i->m_frameIdx].m_tidx == tidx &&
1702                 Less(recons[i->m_frameIdx].m_frameOrder, oldest->m_frameOrder))
1703                 oldest = &recons[i->m_frameIdx];
1704 
1705         return oldest;
1706     }
1707 
CountRefs(std::vector<Reconstruct> const & recons,ArrayDpbFrame const & dpb,mfxU32 tidx)1708     mfxU32 CountRefs(
1709         std::vector<Reconstruct> const & recons,
1710         ArrayDpbFrame const &            dpb,
1711         mfxU32                           tidx)
1712     {
1713         mfxU32 counter = 0;
1714 
1715         DpbFrame const * i = dpb.Begin();
1716         DpbFrame const * e = dpb.End();
1717         for (; i != e; ++i)
1718             if (recons[i->m_frameIdx].m_tidx == tidx)
1719                 counter++;
1720         return counter;
1721     }
1722 
CreateAdditionalDpbCommands(MfxVideoParam const & video,std::vector<Reconstruct> const & recons,DdiTask & task)1723     void CreateAdditionalDpbCommands(
1724         MfxVideoParam const &            video,
1725         std::vector<Reconstruct> const & recons,
1726         DdiTask &                        task)
1727     {
1728         if (video.calcParam.numTemporalLayer == 0)
1729             return;
1730 
1731         task.m_internalListCtrlPresent = false;
1732         task.m_internalListCtrlHasPriority = true;
1733         task.m_internalListCtrlRefModLTR = false;
1734         InitExtBufHeader(task.m_internalListCtrl);
1735 
1736         mfxU32 numLayers  = video.calcParam.numTemporalLayer;
1737         mfxU32 refPicFlag = !!((task.m_type[0] | task.m_type[1]) & MFX_FRAMETYPE_REF);
1738 
1739         if (refPicFlag &&                                   // only ref frames occupy slot in dpb
1740             video.calcParam.tempScalabilityMode == 0 &&     // no long term refs in tempScalabilityMode
1741             numLayers > 1 && (task.m_tidx + 1U) != numLayers)  // no dpb commands for last-not-based temporal laeyr
1742         {
1743             // find oldest ref frame from the same temporal layer
1744             Reconstruct const * toRemove = FindOldestRef(recons, task.m_dpb[0], task.m_tidx);
1745 
1746             if (toRemove == 0 && task.m_dpb[0].Size() == video.mfx.NumRefFrame)
1747             {
1748                 // no ref frame from same layer but need to free dpb slot
1749                 // look for oldest frame from the highest layer
1750                 toRemove = FindOldestRef(recons, task.m_dpb[0], numLayers - 1);
1751                 assert(toRemove != 0);
1752             }
1753 
1754             if (video.mfx.GopRefDist > 1 &&                     // B frames present
1755                 task.m_tidx == 0 &&                             // base layer
1756                 CountRefs(recons, task.m_dpb[0], 0) < 2 &&      // 0 or 1 refs from base layer
1757                 task.m_dpb[0].Size() < video.mfx.NumRefFrame)   // dpb is not full yet
1758             {
1759                 // this is to keep 2 references from base layer for B frames at next layer
1760                 toRemove = 0;
1761             }
1762 
1763             if (toRemove)
1764             {
1765                 task.m_internalListCtrl.RejectedRefList[0].FrameOrder = toRemove->m_frameOrder;
1766                 task.m_internalListCtrl.RejectedRefList[0].PicStruct  = MFX_PICSTRUCT_PROGRESSIVE;
1767             }
1768 
1769             task.m_internalListCtrl.LongTermRefList[0].FrameOrder = task.m_frameOrder;
1770             task.m_internalListCtrl.LongTermRefList[0].PicStruct  = MFX_PICSTRUCT_PROGRESSIVE;
1771             task.m_internalListCtrlPresent = true;
1772         }
1773     }
1774 };
1775 
AssignTask(mfxEncodeCtrl * ctrl,mfxFrameSurface1 * surface,mfxBitstream * bs,DdiTask * & newTask,mfxU16 requiredFrameType)1776 mfxStatus TaskManager::AssignTask(
1777     mfxEncodeCtrl *    ctrl,
1778     mfxFrameSurface1 * surface,
1779     mfxBitstream *     bs,
1780     DdiTask *&         newTask,
1781     mfxU16             requiredFrameType)
1782 {
1783     UMC::AutomaticUMCMutex guard(m_mutex);
1784 
1785     if (m_pushed != 0)
1786     {
1787         // unexpected error happened while encoding previous task
1788         // need to revert state of TaskManager
1789         if (m_pushed->m_idxRecon != NO_INDEX)
1790             SetReconstructFree(m_recons[m_pushed->m_idxRecon]);
1791 
1792         m_pushed->m_bs = 0;
1793         m_pushed->SetFree(true);
1794     }
1795 
1796     DdiTask * toEncode = 0;
1797     if (m_video.mfx.EncodedOrder == 0)
1798     {
1799         if (surface != 0)
1800         {
1801             PairU8 frameType(requiredFrameType & 0xff, requiredFrameType >> 8);
1802             if(requiredFrameType == MFX_FRAMETYPE_UNKNOWN)
1803             {
1804                 frameType = m_frameTypeGen.Get();
1805 
1806                 if (ctrl)
1807                 {
1808                     if ((ctrl->FrameType & MFX_FRAMETYPE_IPB) == MFX_FRAMETYPE_I)
1809                     {
1810                         frameType = MakePair(MFX_FRAMETYPE_IREFIDR,
1811                             (ctrl->FrameType & MFX_FRAMETYPE_xI) ? MFX_FRAMETYPE_xIREF : MFX_FRAMETYPE_xPREF);
1812                     }
1813                     else if (mfxExtVppAuxData * extVpp = GetExtBuffer(*ctrl))
1814                     {
1815                         if (extVpp->SceneChangeRate > 90)
1816                             frameType = MakePair(MFX_FRAMETYPE_IREFIDR, MFX_FRAMETYPE_xPREF);
1817                     }
1818                 }
1819             }
1820 
1821             if ((m_pushed = PushNewTask(surface, ctrl, frameType, m_frameOrder)) == 0)
1822             {
1823                 return MFX_WRN_DEVICE_BUSY;
1824             }
1825         }
1826 
1827         // find oldest frame to encode
1828         toEncode = FindFrameToEncode();
1829 
1830         if (toEncode == 0 && surface == 0)
1831         {
1832             // it is possible that all buffered frames are B frames
1833             // so that none of them has L1 reference
1834             if (m_video.mfx.GopOptFlag & MFX_GOP_STRICT)
1835                 toEncode = SelectNextBFrameFromTail(); // find first B frame from buffer and encode it w/o future refs
1836             else
1837             {
1838                 SwitchLastB2P();
1839                 toEncode = FindFrameToEncode();
1840             }
1841         }
1842 
1843         if (toEncode == 0)
1844         {
1845             if (surface != 0)
1846             {
1847                 // change state here, but it is ok
1848                 // because when there is no task to be confirmed
1849                 m_frameTypeGen.Next();
1850                 m_frameOrder++;
1851                 m_core->IncreaseReference(&surface->Data);
1852                 m_pushed = 0;
1853             }
1854 
1855             return MFX_ERR_MORE_DATA; // nothing to encode
1856         }
1857     }else
1858     {
1859         assert(surface);
1860         assert(ctrl);
1861         assert(ctrl->FrameType & MFX_FRAMETYPE_IPB);
1862 
1863         PairU8 frameType = MakePair(ctrl->FrameType & 0xff, ctrl->FrameType >> 8);
1864         if ((frameType[1] & MFX_FRAMETYPE_xIPB) == 0)
1865             frameType = ExtendFrameType(ctrl->FrameType);
1866 
1867         if ((m_pushed = PushNewTask(surface, ctrl, frameType, surface->Data.FrameOrder)) == 0)
1868             return MFX_WRN_DEVICE_BUSY;
1869 
1870         // no reordering in EncodedOrder
1871         toEncode = m_pushed;
1872     }
1873 
1874     mfxU32 ffid      = toEncode->GetFirstField();
1875     mfxU32 picStruct = toEncode->GetPicStructForEncode();
1876 
1877     toEncode->m_pushed        = m_pushed;
1878     toEncode->m_idrPicId      = m_idrPicId;
1879     toEncode->m_frameNum      = m_frameNum;
1880     toEncode->m_frameOrderIdr = (toEncode->m_type[ffid] & MFX_FRAMETYPE_IDR) ? toEncode->m_frameOrder : m_frameOrderIdr;
1881     toEncode->m_frameOrderI   = (toEncode->m_type[ffid] & MFX_FRAMETYPE_I)   ? toEncode->m_frameOrder : m_frameOrderI;
1882 
1883     toEncode->m_addRepackSize[ffid] = toEncode->m_addRepackSize[!ffid] = 0; // zero compensative padding size
1884 
1885     toEncode->m_picNum.top = toEncode->m_picNum.bot = (picStruct == MFX_PICSTRUCT_PROGRESSIVE)
1886         ? m_frameNum
1887         : m_frameNum * 2 + 1;
1888 
1889     toEncode->m_storeRefBasePicFlag = 0;
1890 
1891     mfxU8 frameType = toEncode->GetFrameType();
1892     assert(frameType);
1893 
1894     if (frameType & MFX_FRAMETYPE_IDR)
1895         toEncode->m_frameNum = 0;
1896 
1897     toEncode->m_dpbOutputDelay    = 2 * (toEncode->m_frameOrder + m_numReorderFrames - m_cpbRemoval);
1898     toEncode->m_cpbRemoval[ ffid] = 2 * (m_cpbRemoval - m_cpbRemovalBufferingPeriod);
1899     toEncode->m_cpbRemoval[!ffid] = (toEncode->m_type[ffid] & MFX_FRAMETYPE_IDR)
1900         ? 1
1901         : toEncode->m_cpbRemoval[ffid] + 1;
1902 
1903     toEncode->m_bs = bs;
1904 
1905     if (m_bitstreams.size() > 0)
1906     {
1907         std::vector<Surface>::iterator it = std::find_if(
1908             m_bitstreams.begin(),
1909             m_bitstreams.end(),
1910             std::mem_fun_ref(&Surface::IsFree));
1911         if (it == m_bitstreams.end())
1912             return MFX_WRN_DEVICE_BUSY;
1913 
1914         toEncode->m_idxBs[0] = mfxU32(it - m_bitstreams.begin());
1915 
1916         if (picStruct != MFX_PICSTRUCT_PROGRESSIVE)
1917         {
1918             std::vector<Surface>::iterator it2 = std::find_if(
1919                 it + 1,
1920                 m_bitstreams.end(),
1921                 std::mem_fun_ref(&Surface::IsFree));
1922             if (it2 == m_bitstreams.end())
1923                 return MFX_WRN_DEVICE_BUSY;
1924 
1925             toEncode->m_idxBs[1] = mfxU32(it2 - m_bitstreams.begin());
1926         }
1927     }
1928 
1929     if (m_video.IOPattern == MFX_IOPATTERN_IN_SYSTEM_MEMORY)
1930     {
1931         toEncode->m_idx = FindFreeSurface(m_raws);
1932         MFX_CHECK(toEncode->m_idx != NO_INDEX, MFX_WRN_DEVICE_BUSY);
1933     }
1934 
1935     toEncode->m_idxRecon = FindFreeSurface(m_recons);
1936     MFX_CHECK(toEncode->m_idxRecon != NO_INDEX, MFX_WRN_DEVICE_BUSY);
1937 
1938     toEncode->m_idxReconOffset = CalcNumSurfRecon(m_video) * m_viewIdx;
1939     toEncode->m_idxBsOffset    = CalcNumSurfBitstream(m_video) * m_viewIdx;
1940 
1941     mfxExtAvcTemporalLayers const * extTemp = GetExtBuffer(m_video);
1942     toEncode->m_tidx = CalcTemporalLayerIndex(m_video, toEncode->m_frameOrder - toEncode->m_frameOrderIdr);
1943     toEncode->m_tid  = m_video.calcParam.tid[toEncode->m_tidx];
1944     toEncode->m_pid  = toEncode->m_tidx + extTemp->BaseLayerPID;
1945 
1946     toEncode->m_decRefPicMrkRep[ffid] = m_decRefPicMrkRep;
1947     toEncode->m_dpb[ffid]             = m_dpb;
1948     BuildRefPicLists(*toEncode, ffid);
1949     ModifyRefPicLists(*toEncode, ffid);
1950 
1951     DecideOnRefPicFlag(m_video, *toEncode); // check for temporal layers
1952     CreateAdditionalDpbCommands(m_video, m_recons, *toEncode); // for svc temporal layers
1953 
1954     mfxExtCodingOption2 const * extOpt2 = GetExtBuffer(m_video);
1955     if (toEncode->m_ctrl.SkipFrame != 0)
1956     {
1957         toEncode->m_ctrl.SkipFrame = (extOpt2->SkipFrame) ? 1 : 0;
1958 
1959         if (toEncode->SkipFlag() != 0)
1960         {
1961             toEncode->m_type.top &= ~MFX_FRAMETYPE_REF;
1962             toEncode->m_type.bot &= ~MFX_FRAMETYPE_REF;
1963         }
1964     }
1965 
1966     mfxExtCodingOptionDDI const * extDdi = GetExtBuffer(m_video);
1967     toEncode->m_subMbPartitionAllowed[0] = CheckSubMbPartition(extDdi, toEncode->m_type[0]);
1968     toEncode->m_subMbPartitionAllowed[1] = CheckSubMbPartition(extDdi, toEncode->m_type[1]);
1969 
1970 
1971     mfxExtCodingOption const * extOpt = GetExtBuffer(m_video);
1972     toEncode->m_insertSps[ ffid] = (toEncode->m_type[ffid] & MFX_FRAMETYPE_I) ? 1 : 0;
1973     toEncode->m_insertSps[!ffid] = 0;
1974     toEncode->m_insertAud[ ffid] = IsOn(extOpt->AUDelimiter);
1975     toEncode->m_insertAud[!ffid] = IsOn(extOpt->AUDelimiter);
1976 
1977     mfxExtCodingOption3 const * extOpt3 = GetExtBuffer(m_video);
1978     toEncode->m_numMbPerSlice = extOpt2->NumMbPerSlice;
1979     toEncode->m_numSlice[ffid] = (toEncode->m_type[ffid] & MFX_FRAMETYPE_I) ? extOpt3->NumSliceI :
1980         (toEncode->m_type[ffid] & MFX_FRAMETYPE_P) ? extOpt3->NumSliceP : extOpt3->NumSliceB;
1981     toEncode->m_numSlice[!ffid] = (toEncode->m_type[!ffid] & MFX_FRAMETYPE_I) ? extOpt3->NumSliceI :
1982         (toEncode->m_type[!ffid] & MFX_FRAMETYPE_P) ? extOpt3->NumSliceP : extOpt3->NumSliceB;
1983 
1984     if (m_video.calcParam.tempScalabilityMode)
1985     {
1986         toEncode->m_insertPps[ ffid] = toEncode->m_insertSps[ ffid];
1987         toEncode->m_insertPps[!ffid] = toEncode->m_insertSps[!ffid];
1988 
1989         mfxU32 idrFlag = !!(toEncode->m_type[ ffid] & MFX_FRAMETYPE_IDR);
1990         mfxU32 refFlag = !!(toEncode->m_type[ ffid] & MFX_FRAMETYPE_REF);
1991         toEncode->m_nalRefIdc[ ffid] = idrFlag ? 3 : (refFlag ? 2 : 0);
1992 
1993         idrFlag = !!(toEncode->m_type[!ffid] & MFX_FRAMETYPE_IDR);
1994         refFlag = !!(toEncode->m_type[!ffid] & MFX_FRAMETYPE_REF);
1995         toEncode->m_nalRefIdc[!ffid] = idrFlag ? 3 : (refFlag ? 2 : 0);
1996     }
1997     else
1998     {
1999         toEncode->m_insertPps[ ffid] = toEncode->m_insertSps[ ffid] || IsOn(extOpt2->RepeatPPS);
2000         toEncode->m_insertPps[!ffid] = toEncode->m_insertSps[!ffid] || IsOn(extOpt2->RepeatPPS);
2001         toEncode->m_nalRefIdc[ ffid] = !!(toEncode->m_type[ ffid] & MFX_FRAMETYPE_REF);
2002         toEncode->m_nalRefIdc[!ffid] = !!(toEncode->m_type[!ffid] & MFX_FRAMETYPE_REF);
2003 
2004     }
2005 
2006     toEncode->m_cqpValue[0] = GetQpValue(*toEncode, m_video, toEncode->m_type[0]);
2007     toEncode->m_cqpValue[1] = GetQpValue(*toEncode, m_video, toEncode->m_type[1]);
2008 
2009     mfxExtMVCSeqDesc * extMvc = GetExtBuffer(m_video);
2010     toEncode->m_statusReportNumber[0] = 2 * (m_cpbRemoval * extMvc->NumView + m_viewIdx);
2011     toEncode->m_statusReportNumber[1] = 2 * (m_cpbRemoval * extMvc->NumView + m_viewIdx) + 1;
2012 
2013     toEncode->m_viewIdx = m_viewIdx;
2014 
2015     if (picStruct == MFX_PICSTRUCT_PROGRESSIVE)
2016     {
2017         UpdateDpb(*toEncode, ffid, toEncode->m_dpbPostEncoding);
2018 
2019         m_recons[toEncode->m_idxRecon] = *toEncode;
2020         m_recons[toEncode->m_idxRecon].m_reference[ffid] = (toEncode->m_type[ ffid] & MFX_FRAMETYPE_REF) != 0;
2021         m_recons[toEncode->m_idxRecon].SetFree(true); // recon surface will be marked used in ConfirmTask()
2022     }
2023     else
2024     {
2025         toEncode->m_decRefPicMrkRep[!ffid].presentFlag =
2026             (toEncode->m_type[ffid] & MFX_FRAMETYPE_IDR) ||
2027             (toEncode->m_decRefPicMrk[ffid].mmco.Size() > 0);
2028 
2029         toEncode->m_decRefPicMrkRep[!ffid].original_idr_flag          = (toEncode->m_type[ffid] & MFX_FRAMETYPE_IDR) ? 1 : 0;
2030         toEncode->m_decRefPicMrkRep[!ffid].original_frame_num         = (toEncode->m_frameNum);
2031         toEncode->m_decRefPicMrkRep[!ffid].original_field_pic_flag    = (picStruct == MFX_PICSTRUCT_PROGRESSIVE) ? 0 : 1;
2032         toEncode->m_decRefPicMrkRep[!ffid].original_bottom_field_flag = (ffid == BFIELD) ? 1 : 0;
2033         toEncode->m_decRefPicMrkRep[!ffid].dec_ref_pic_marking        = toEncode->m_decRefPicMrk[ffid];
2034 
2035         UpdateDpb(*toEncode, ffid, toEncode->m_dpb[!ffid]);
2036         m_recons[toEncode->m_idxRecon] = *toEncode;
2037         m_recons[toEncode->m_idxRecon].m_reference[ ffid] = (toEncode->m_type[ ffid] & MFX_FRAMETYPE_REF) != 0;
2038 
2039         BuildRefPicLists(*toEncode, !ffid);
2040         ModifyRefPicLists(*toEncode, !ffid);
2041 
2042         UpdateDpb(*toEncode, !ffid, toEncode->m_dpbPostEncoding);
2043         m_recons[toEncode->m_idxRecon].m_reference[!ffid] = (toEncode->m_type[!ffid] & MFX_FRAMETYPE_REF) != 0;
2044         m_recons[toEncode->m_idxRecon].SetFree(true); // recon surface will be marked used in ConfirmTask()
2045     }
2046 
2047     newTask = toEncode;
2048     return MFX_ERR_NONE;
2049 }
2050 
PushNewTask(mfxFrameSurface1 * surface,mfxEncodeCtrl * ctrl,PairU8 type,mfxU32 frameOrder)2051 DdiTask* TaskManager::PushNewTask(
2052     mfxFrameSurface1 * surface,
2053     mfxEncodeCtrl *    ctrl,
2054     PairU8             type,
2055     mfxU32             frameOrder)
2056 {
2057     assert(surface);
2058 
2059     mfxU32 insertAt = FindFreeSurface(m_tasks);
2060     if (insertAt == NO_INDEX)
2061         return 0;
2062 
2063     DdiTask task;
2064 
2065     task.m_picStruct = GetPicStruct(m_video, surface->Info.PicStruct);
2066 
2067     mfxU32 ffid = task.GetFirstField();
2068 
2069     task.m_type[ ffid]   = type[0];
2070     task.m_type[!ffid]   = type[1];
2071     task.m_frameOrder    = frameOrder;
2072     task.m_yuv           = surface;
2073     task.m_extFrameTag   = surface->Data.FrameOrder; // mark task with external FrameOrder
2074 
2075     if (ctrl)
2076         task.m_ctrl = *ctrl;
2077 
2078     if (type[0] & MFX_FRAMETYPE_B)
2079     {
2080         task.m_loc = m_frameTypeGen.GetBiFrameLocation();
2081 
2082         task.m_type[ ffid] |= task.m_loc.refFrameFlag;
2083         task.m_type[!ffid] |= task.m_loc.refFrameFlag;
2084     }
2085 
2086     task.SetFree(false);
2087     m_tasks[insertAt] = task; // change state
2088                               // if task isn't confirmed by ConfirmTask
2089                               // it will be popped from list at next AssignTask
2090 
2091     return &m_tasks[insertAt];
2092 }
2093 
2094 namespace
2095 {
FindByExtFrameTag(mfxU8 * begin,mfxU8 * end,ArrayDpbFrame const & dpb,std::vector<Reconstruct> const & recons,mfxU32 frameTag,mfxU32 picStruct)2096     mfxU8 * FindByExtFrameTag(
2097         mfxU8 *                          begin,
2098         mfxU8 *                          end,
2099         ArrayDpbFrame const &            dpb,
2100         std::vector<Reconstruct> const & recons,
2101         mfxU32                           frameTag,
2102         mfxU32                           picStruct)
2103     {
2104         mfxU8 fieldId = picStruct == MFX_PICSTRUCT_FIELD_BFF ? 0x80 : 0;
2105 
2106         for (; begin != end; ++begin)
2107         {
2108             if (recons[dpb[*begin & 0x7f].m_frameIdx].m_extFrameTag == frameTag)
2109             {
2110                 if (picStruct == MFX_PICSTRUCT_PROGRESSIVE || fieldId == (*begin & 0x80))
2111                     break;
2112             }
2113         }
2114 
2115         return begin;
2116     }
2117 
RotateRight(mfxU8 * begin,mfxU8 * end)2118     void RotateRight(mfxU8 * begin, mfxU8 * end)
2119     {
2120         if (begin != end)
2121         {
2122             mfxU8 mostRight = *--end;
2123 
2124             for (; begin != end; --end)
2125                 *end = *(end - 1);
2126 
2127             *begin = mostRight;
2128         }
2129     }
2130 
RotateLeft(mfxU8 * begin,mfxU8 * end)2131     void RotateLeft(mfxU8 * begin, mfxU8 * end)
2132     {
2133         if (begin != end)
2134         {
2135             --end;
2136             mfxU8 mostLeft = *begin;
2137 
2138             for (; begin != end; ++begin)
2139                 *begin = *(begin + 1);
2140 
2141             *end = mostLeft;
2142         }
2143     }
2144 
ReorderRefPicList(ArrayU8x33 & refPicList,ArrayDpbFrame const & dpb,std::vector<Reconstruct> const & recons,mfxExtAVCRefListCtrl const & ctrl,mfxU32 numActiveRef,mfxU32)2145     void ReorderRefPicList( // PreserveOrderInPreferredRefList
2146         ArrayU8x33 &                     refPicList,
2147         ArrayDpbFrame const &            dpb,
2148         std::vector<Reconstruct> const & recons,
2149         mfxExtAVCRefListCtrl const &     ctrl,
2150         mfxU32                           numActiveRef,
2151         mfxU32                           /*curPicStruct*/)
2152     {
2153         mfxU8 * begin = refPicList.Begin();
2154         mfxU8 * end   = refPicList.End();
2155 
2156         for (mfxU32 i = 0; i < 32 && ctrl.PreferredRefList[i].FrameOrder != 0xffffffff; i++)
2157         {
2158             mfxU8 * ref = FindByExtFrameTag(
2159                 begin,
2160                 end,
2161                 dpb,
2162                 recons,
2163                 ctrl.PreferredRefList[i].FrameOrder,
2164                 ctrl.PreferredRefList[i].PicStruct);
2165 
2166             if (ref != end)
2167             {
2168                 RotateRight(begin, ref + 1);
2169                 begin++;
2170             }
2171         }
2172 
2173         for (mfxU32 i = 0; i < 16 && ctrl.RejectedRefList[i].FrameOrder != 0xffffffff; i++)
2174         {
2175             mfxU8 * ref = FindByExtFrameTag(
2176                 begin,
2177                 end,
2178                 dpb,
2179                 recons,
2180                 ctrl.RejectedRefList[i].FrameOrder,
2181                 ctrl.RejectedRefList[i].PicStruct);
2182 
2183             if (ref != end)
2184             {
2185                 RotateLeft(ref, end);
2186                 --end;
2187             }
2188         }
2189 
2190         refPicList.Resize((mfxU32)(end - refPicList.Begin()));
2191         if (numActiveRef > 0 && refPicList.Size() > numActiveRef)
2192             refPicList.Resize(numActiveRef);
2193     }
2194 };
2195 
ModifyRefPicLists(DdiTask & task,mfxU32 fieldId) const2196 void TaskManager::ModifyRefPicLists(
2197     DdiTask & task,
2198     mfxU32    fieldId) const
2199 {
2200     ArrayDpbFrame const & dpb   = task.m_dpb[fieldId];
2201     ArrayU8x33 &          list0 = task.m_list0[fieldId];
2202     ArrayU8x33 &          list1 = task.m_list1[fieldId];
2203     mfxU32                ps    = task.GetPicStructForEncode();
2204     ArrayRefListMod &     mod0  = task.m_refPicList0Mod[fieldId];
2205     ArrayRefListMod &     mod1  = task.m_refPicList1Mod[fieldId];
2206 
2207     ArrayU8x33 initList0 = task.m_list0[fieldId];
2208     ArrayU8x33 initList1 = task.m_list1[fieldId];
2209     mfxI32     curPicNum = task.m_picNum[fieldId];
2210 
2211     if ((m_video.mfx.GopOptFlag & MFX_GOP_CLOSED) || Less(task.m_frameOrderI, task.m_frameOrder))
2212     {
2213         // remove references to pictures prior to first I frame in decoding order
2214         // if gop is closed do it for all frames in gop
2215         // if gop is open do it for pictures subsequent to first I frame in display order
2216 
2217         mfxU32 firstIntraFramePoc = 2 * (task.m_frameOrderI - task.m_frameOrderIdr);
2218 
2219         list0.Erase(
2220             std::remove_if(list0.Begin(), list0.End(), LogicalAnd(
2221                 RefPocIsLessThan(m_recons, dpb, firstIntraFramePoc),
2222                 RefIsShortTerm(m_recons, dpb))),
2223             list0.End());
2224 
2225         list1.Erase(
2226             std::remove_if(list1.Begin(), list1.End(), LogicalAnd(
2227                 RefPocIsLessThan(m_recons, dpb, firstIntraFramePoc),
2228                 RefIsShortTerm(m_recons, dpb))),
2229             list1.End());
2230     }
2231 
2232     mfxExtCodingOptionDDI const * extDdi = GetExtBuffer(m_video);
2233 
2234     if (mfxExtAVCRefListCtrl * ctrl = (m_video.calcParam.numTemporalLayer == 0) ? GetExtBuffer(task.m_ctrl) : (mfxExtAVCRefListCtrl *)0)
2235     {
2236         mfxU32 numActiveRefL0 = (task.m_type[fieldId] & MFX_FRAMETYPE_P)
2237             ? extDdi->NumActiveRefP
2238             : extDdi->NumActiveRefBL0;
2239         if (task.m_type[fieldId] & MFX_FRAMETYPE_PB)
2240         {
2241             numActiveRefL0 = ctrl->NumRefIdxL0Active ? std::min<mfxU32>(ctrl->NumRefIdxL0Active,numActiveRefL0) : numActiveRefL0;
2242             ReorderRefPicList(list0, dpb, m_recons, *ctrl, numActiveRefL0, ps);
2243         }
2244 
2245         if (task.m_type[fieldId] & MFX_FRAMETYPE_B)
2246         {
2247             mfxU32 numActiveRefL1 = ctrl->NumRefIdxL1Active ? std::min(ctrl->NumRefIdxL1Active,extDdi->NumActiveRefBL1) : extDdi->NumActiveRefBL1;
2248             ReorderRefPicList(list1, dpb, m_recons, *ctrl, numActiveRefL1, ps);
2249         }
2250     }
2251     else
2252     {
2253         // prepare ref list for P-field of I/P field pair
2254         // swap 1st and 2nd entries of L0 ref pic list to use I-field of I/P pair as reference for P-field
2255         mfxU32 ffid = task.GetFirstField();
2256         if ((task.m_type[ ffid] & MFX_FRAMETYPE_I) &&
2257             (task.m_type[!ffid] & MFX_FRAMETYPE_P))
2258         {
2259             if (ps != MFX_PICSTRUCT_PROGRESSIVE && fieldId != ffid && list0.Size() > 1)
2260                 std::swap(list0[0], list0[1]);
2261         }
2262         else if (task.m_type[fieldId] & MFX_FRAMETYPE_B)
2263         {
2264             mfxU8 save0 = list0[0];
2265             mfxU8 save1 = list1[0];
2266 
2267             list0.Erase(
2268                 std::remove_if(list0.Begin(), list0.End(),
2269                     RefPocIsGreaterThan(m_recons, dpb, task.GetPoc(fieldId))),
2270                 list0.End());
2271 
2272             list1.Erase(
2273                 std::remove_if(list1.Begin(), list1.End(),
2274                     RefPocIsLessThan(m_recons, dpb, task.GetPoc(fieldId))),
2275                 list1.End());
2276 
2277             // keep at least one ref pic in lists
2278             if (list0.Size() == 0)
2279                 list0.PushBack(save0);
2280             if (list1.Size() == 0)
2281                 list1.PushBack(save1);
2282         }
2283 
2284         if (m_video.calcParam.numTemporalLayer > 0)
2285         {
2286             list0.Erase(
2287                 std::remove_if(
2288                     list0.Begin(),
2289                     list0.End(),
2290                     RefIsFromHigherTemporalLayer(m_recons, dpb, task.m_tid)),
2291                 list0.End());
2292 
2293             list1.Erase(
2294                 std::remove_if(
2295                     list1.Begin(),
2296                     list1.End(),
2297                     RefIsFromHigherTemporalLayer(m_recons, dpb, task.m_tid)),
2298                 list1.End());
2299 
2300             std::sort(list0.Begin(), list0.End(), RefPocIsGreater(m_recons, dpb));
2301             std::sort(list1.Begin(), list1.End(), RefPocIsLess(m_recons, dpb));
2302 
2303             if (m_video.calcParam.tempScalabilityMode)
2304             { // cut lists to 1 element for tempScalabilityMode
2305                 list0.Resize(std::min(list0.Size(), 1u));
2306                 list1.Resize(std::min(list1.Size(), 1u));
2307             }
2308         }
2309 
2310         mfxU32 numActiveRefL1 = extDdi->NumActiveRefBL1;
2311         mfxU32 numActiveRefL0 = (task.m_type[fieldId] & MFX_FRAMETYPE_P)
2312             ? extDdi->NumActiveRefP
2313             : extDdi->NumActiveRefBL0;
2314 
2315         if (numActiveRefL0 > 0 && list0.Size() > numActiveRefL0)
2316             list0.Resize(numActiveRefL0);
2317         if (numActiveRefL1 > 0 && list1.Size() > numActiveRefL1)
2318             list1.Resize(numActiveRefL1);
2319 
2320 #ifdef MFX_ENABLE_SVC_VIDEO_ENCODE_HW
2321         mfxExtSVCSeqDesc const * extSvc = GetExtBuffer(m_video);
2322         if (mfxU32 refBaseDist = extSvc->RefBaseDist)
2323         {
2324             mfxU32 prevKeyPicPoc = ((task.m_frameOrder - 1) / refBaseDist * refBaseDist - task.m_frameOrderIdr) * 2;
2325 
2326             list0.Erase(
2327                 std::remove_if(list0.Begin(), list0.End(), LogicalAnd(
2328                     RefPocIsLessThan(m_recons, dpb, prevKeyPicPoc),
2329                     RefIsShortTerm(m_recons, dpb))),
2330                 list0.End());
2331 
2332             list1.Erase(
2333                 std::remove_if(list1.Begin(), list1.End(), LogicalAnd(
2334                     RefPocIsLessThan(m_recons, dpb, prevKeyPicPoc),
2335                     RefIsShortTerm(m_recons, dpb))),
2336                 list1.End());
2337         }
2338 #endif
2339     }
2340 
2341     initList0.Resize(list0.Size());
2342     initList1.Resize(list1.Size());
2343 
2344     bool noLongTermInList0 = LongTermInList(m_recons, dpb, initList0);
2345     bool noLongTermInList1 = LongTermInList(m_recons, dpb, initList1);
2346 
2347     mod0 = CreateRefListMod(dpb, m_recons, initList0, list0, task.m_viewIdx, curPicNum, noLongTermInList0);
2348     mod1 = CreateRefListMod(dpb, m_recons, initList1, list1, task.m_viewIdx, curPicNum, noLongTermInList1);
2349 }
2350 
ConfirmTask(DdiTask & task)2351 void TaskManager::ConfirmTask(DdiTask & task)
2352 {
2353     UMC::AutomaticUMCMutex guard(m_mutex);
2354 
2355     mfxU32 ps = task.GetPicStructForEncode();
2356 
2357     if (task.GetFrameType() & MFX_FRAMETYPE_IDR)
2358     {
2359         m_frameNum = 0;
2360         m_idrPicId++;
2361 
2362         // sei message with buffering period will be sent with IDR picture
2363         // it will reset cpb/dpb delays
2364         m_cpbRemovalBufferingPeriod = m_cpbRemoval;
2365     }
2366 
2367     m_cpbRemoval++; // is incremented every frame (unlike frame_num)
2368 
2369     if (task.GetFrameType() & MFX_FRAMETYPE_REF || task.m_nalRefIdc[0])
2370     {
2371         m_frameNum = (m_frameNum + 1) % m_frameNumMax;
2372     }
2373 
2374     if (task.GetFrameType() & MFX_FRAMETYPE_IDR)
2375     {
2376         m_frameOrderIdr = task.m_frameOrder;
2377     }
2378 
2379     if (task.GetFrameType() & MFX_FRAMETYPE_I)
2380     {
2381         m_frameOrderI = task.m_frameOrder;
2382     }
2383 
2384     if (task.m_pushed != 0)
2385     {
2386         if (task.m_pushed->GetFrameType() == MFX_FRAMETYPE_IREFIDR)
2387             m_frameTypeGen.Init(m_video); // idr starts new gop
2388 
2389         m_frameTypeGen.Next();
2390         m_frameOrder++;
2391         m_core->IncreaseReference(&task.m_pushed->m_yuv->Data);
2392     }
2393 
2394     if (m_video.IOPattern == MFX_IOPATTERN_IN_SYSTEM_MEMORY && task.m_idx != NO_INDEX)
2395     {
2396         m_raws[task.m_idx].SetFree(false);
2397     }
2398 
2399     if (task.m_idxBs.top != NO_INDEX)
2400     {
2401         m_bitstreams[task.m_idxBs.top].SetFree(false);
2402     }
2403 
2404     if (task.m_idxBs.bot != NO_INDEX)
2405     {
2406         assert(ps != MFX_PICSTRUCT_PROGRESSIVE);
2407         m_bitstreams[task.m_idxBs.bot].SetFree(false);
2408     }
2409 
2410     if (task.m_idxRecon != NO_INDEX)
2411     {
2412         m_recons[task.m_idxRecon].SetFree(false);
2413         m_recons[task.m_idxRecon].m_reference[TFIELD] = (task.m_type[TFIELD] & MFX_FRAMETYPE_REF) != 0;
2414         m_recons[task.m_idxRecon].m_reference[BFIELD] = (task.m_type[BFIELD] & MFX_FRAMETYPE_REF) != 0;
2415     }
2416 
2417     // task already has a dpb with sliding window and adapative dec_ref_pic_marking commands applied
2418     // store it for next task
2419     m_dpb = task.m_dpbPostEncoding;
2420 
2421     // store dec_ref_pic_marking info of last picture for repetition sei
2422     mfxU32 lastField = (ps == MFX_PICSTRUCT_PROGRESSIVE) ? 0 : !task.GetFirstField();
2423 
2424     m_decRefPicMrkRep.presentFlag =
2425         (task.m_type[lastField] & MFX_FRAMETYPE_IDR) ||
2426         (task.m_decRefPicMrk[lastField].mmco.Size() > 0);
2427 
2428     m_decRefPicMrkRep.original_idr_flag          = (task.m_type[lastField] & MFX_FRAMETYPE_IDR) ? 1 : 0;
2429     m_decRefPicMrkRep.original_frame_num         = (task.m_frameNum);
2430     m_decRefPicMrkRep.original_field_pic_flag    = (ps == MFX_PICSTRUCT_PROGRESSIVE) ? 0 : 1;
2431     m_decRefPicMrkRep.original_bottom_field_flag = (lastField == BFIELD) ? 1 : 0;
2432     m_decRefPicMrkRep.dec_ref_pic_marking        = task.m_decRefPicMrk[lastField];
2433 
2434     m_pushed = 0;
2435     m_stat.NumCachedFrame++;
2436 }
2437 
2438 
CompleteTask(DdiTask & task)2439 void TaskManager::CompleteTask(DdiTask & task)
2440 {
2441     UMC::AutomaticUMCMutex guard(m_mutex);
2442 
2443     mfxExtCodingOptionDDI const * optDdi = GetExtBuffer(m_video);
2444 
2445     assert(std::find_if(m_tasks.begin(), m_tasks.end(), FindByFrameOrder(task.m_frameOrder)) != m_tasks.end());
2446 
2447     ArrayDpbFrame const & iniDpb = task.m_dpb[task.GetFirstField()];
2448     ArrayDpbFrame const & finDpb = task.m_dpbPostEncoding;
2449     for (mfxU32 i = 0; i < iniDpb.Size(); i++)
2450     {
2451         if (std::find(finDpb.Begin(), finDpb.End(), iniDpb[i]) == finDpb.End())
2452         {
2453             SetReconstructFree(m_recons[iniDpb[i].m_frameIdx]);
2454 
2455             if (IsOn(optDdi->RefRaw))
2456             {
2457                 if (m_video.IOPattern == MFX_IOPATTERN_IN_SYSTEM_MEMORY)
2458                     m_raws[iniDpb[i].m_frameIdx].SetFree(true);
2459                 else
2460                     m_core->DecreaseReference(&m_recons[iniDpb[i].m_frameIdx].m_yuv->Data);
2461             }
2462         }
2463     }
2464 
2465     if (task.m_idxBs[0] != NO_INDEX)
2466     {
2467         m_bitstreams[task.m_idxBs[0]].SetFree(true);
2468     }
2469 
2470     if (task.m_idxBs[1] != NO_INDEX)
2471     {
2472         assert((task.GetPicStructForEncode() & MFX_PICSTRUCT_PROGRESSIVE) == 0);
2473         m_bitstreams[task.m_idxBs[1]].SetFree(true);
2474     }
2475 
2476     if (IsOff(optDdi->RefRaw))
2477     {
2478         m_core->DecreaseReference(&task.m_yuv->Data);
2479 
2480         if (task.m_idx != NO_INDEX && m_video.IOPattern == MFX_IOPATTERN_IN_SYSTEM_MEMORY)
2481             m_raws[task.m_idx].SetFree(true);
2482     }
2483     else
2484     {
2485         if (m_video.IOPattern == MFX_IOPATTERN_IN_SYSTEM_MEMORY)
2486             // when input surface is in system memory
2487             // we can release it right after task is completed
2488             // even if raw surfaces are used as reference
2489             m_core->DecreaseReference(&task.m_yuv->Data);
2490 
2491         if (!m_recons[task.m_idxRecon].m_reference[0] && !m_recons[task.m_idxRecon].m_reference[1])
2492         {
2493             m_core->DecreaseReference(&task.m_yuv->Data);
2494             if (m_video.IOPattern == MFX_IOPATTERN_IN_SYSTEM_MEMORY)
2495                 m_raws[task.m_idx].SetFree(true);
2496         }
2497     }
2498 
2499     if (task.m_idxRecon != NO_INDEX)
2500     {
2501         if (!m_recons[task.m_idxRecon].m_reference[0] && !m_recons[task.m_idxRecon].m_reference[1])
2502             SetReconstructFree(m_recons[task.m_idxRecon]);
2503     }
2504 
2505     m_stat.NumCachedFrame--;
2506     m_stat.NumFrame++;
2507     m_stat.NumBit += 8 * (task.m_bsDataLength[0] + task.m_bsDataLength[1]);
2508 
2509     task.m_bs = 0;
2510     task.SetFree(true);
2511 }
2512 
CountRunningTasks()2513 mfxU32 TaskManager::CountRunningTasks()
2514 {
2515     UMC::AutomaticUMCMutex guard(m_mutex);
2516 
2517     mfxU32 count = 0;
2518     for (size_t i = 0; i < m_bitstreams.size(); i++)
2519         if (!m_bitstreams[i].IsFree())
2520             ++count;
2521 
2522     return count;
2523 }
2524 
2525 
GetNalUnit(mfxU8 * begin,mfxU8 * end)2526 NalUnit MfxHwH264Encode::GetNalUnit(mfxU8 * begin, mfxU8 * end)
2527 {
2528     for (; begin < end - 5; ++begin)
2529     {
2530         if ((begin[0] == 0 && begin[1] == 0 && begin[2] == 1) ||
2531             (begin[0] == 0 && begin[1] == 0 && begin[2] == 0 && begin[3] == 1))
2532         {
2533             mfxU8 numZero = (begin[2] == 1 ? 2 : 3);
2534             mfxU8 type    = (begin[2] == 1 ? begin[3] : begin[4]) & 0x1f;
2535 
2536             for (mfxU8 * next = begin + 4; next < end - 4; ++next)
2537             {
2538                 if (next[0] == 0 && next[1] == 0 && next[2] == 1)
2539                 {
2540                     if (*(next - 1) == 0)
2541                         --next;
2542 
2543                     return NalUnit(begin, next, type, numZero);
2544                 }
2545             }
2546 
2547             return NalUnit(begin, end, type, numZero);
2548         }
2549     }
2550 
2551     return NalUnit();
2552 }
2553 
PrepareSeiMessage(DdiTask const & task,mfxU32 nalHrdBpPresentFlag,mfxU32 vclHrdBpPresentFlag,mfxU32 seqParameterSetId,mfxExtAvcSeiBufferingPeriod & msg)2554 void MfxHwH264Encode::PrepareSeiMessage(
2555     DdiTask const &               task,
2556     mfxU32                        nalHrdBpPresentFlag,
2557     mfxU32                        vclHrdBpPresentFlag,
2558     mfxU32                        seqParameterSetId,
2559     mfxExtAvcSeiBufferingPeriod & msg)
2560 {
2561     Zero(msg);
2562 
2563     assert(seqParameterSetId < 32);
2564     msg.seq_parameter_set_id                    = mfxU8(seqParameterSetId);
2565     msg.nal_cpb_cnt                             = !!nalHrdBpPresentFlag;
2566     msg.vcl_cpb_cnt                             = !!vclHrdBpPresentFlag;
2567     msg.initial_cpb_removal_delay_length        = 24;
2568     msg.nal_initial_cpb_removal_delay[0]        = task.m_initCpbRemoval;
2569     msg.nal_initial_cpb_removal_delay_offset[0] = task.m_initCpbRemovalOffset;
2570     msg.vcl_initial_cpb_removal_delay[0]        = task.m_initCpbRemoval;
2571     msg.vcl_initial_cpb_removal_delay_offset[0] = task.m_initCpbRemovalOffset;
2572 }
2573 
PrepareSeiMessage(DdiTask const & task,mfxU32 fieldId,mfxU32 cpbDpbDelaysPresentFlag,mfxExtAvcSeiPicTiming & msg)2574 void MfxHwH264Encode::PrepareSeiMessage(
2575     DdiTask const &                task,
2576     mfxU32                         fieldId,
2577     mfxU32                         cpbDpbDelaysPresentFlag,
2578     mfxExtAvcSeiPicTiming &        msg)
2579 {
2580     Zero(msg);
2581     msg.cpb_dpb_delays_present_flag = mfxU8(cpbDpbDelaysPresentFlag);
2582     msg.cpb_removal_delay_length    = 24;
2583     msg.dpb_output_delay_length     = 24;
2584     msg.pic_struct_present_flag     = 1;
2585     msg.time_offset_length          = 24;
2586     msg.cpb_removal_delay           = task.m_cpbRemoval[fieldId];
2587     msg.dpb_output_delay            = task.m_dpbOutputDelay;
2588 
2589     switch (task.GetPicStructForDisplay())
2590     {
2591     case mfxU16(MFX_PICSTRUCT_FIELD_TFF):
2592     case mfxU16(MFX_PICSTRUCT_FIELD_BFF):
2593         msg.pic_struct = mfxU8(fieldId + 1);
2594         msg.ct_type    = 1;
2595         break;
2596     case mfxU16(MFX_PICSTRUCT_PROGRESSIVE | MFX_PICSTRUCT_FIELD_TFF):
2597         msg.pic_struct = 3;
2598         msg.ct_type    = 1;
2599         break;
2600     case mfxU16(MFX_PICSTRUCT_PROGRESSIVE | MFX_PICSTRUCT_FIELD_BFF):
2601         msg.pic_struct = 4;
2602         msg.ct_type    = 1;
2603         break;
2604     case mfxU16(MFX_PICSTRUCT_PROGRESSIVE | MFX_PICSTRUCT_FIELD_TFF | MFX_PICSTRUCT_FIELD_REPEATED):
2605         msg.pic_struct = 5;
2606         msg.ct_type    = 1;
2607         break;
2608     case mfxU16(MFX_PICSTRUCT_PROGRESSIVE | MFX_PICSTRUCT_FIELD_BFF | MFX_PICSTRUCT_FIELD_REPEATED):
2609         msg.pic_struct = 6;
2610         msg.ct_type    = 1;
2611         break;
2612     case mfxU16(MFX_PICSTRUCT_PROGRESSIVE | MFX_PICSTRUCT_FRAME_DOUBLING):
2613         msg.pic_struct = 7;
2614         msg.ct_type    = 0;
2615         break;
2616     case mfxU16(MFX_PICSTRUCT_PROGRESSIVE | MFX_PICSTRUCT_FRAME_TRIPLING):
2617         msg.pic_struct = 8;
2618         msg.ct_type    = 0;
2619         break;
2620     case mfxU16(MFX_PICSTRUCT_PROGRESSIVE):
2621     default:
2622         msg.pic_struct = 0;
2623         msg.ct_type    = 0;
2624         break;
2625     }
2626 }
2627 
PrepareSeiMessage(const DdiTask & task,mfxU32 fieldId,mfxU32 frame_mbs_only_flag,mfxExtAvcSeiDecRefPicMrkRep & extSeiDecRefPicMrkRep)2628 void MfxHwH264Encode::PrepareSeiMessage(
2629     const DdiTask& task,
2630     mfxU32 fieldId,
2631     mfxU32 frame_mbs_only_flag,
2632     mfxExtAvcSeiDecRefPicMrkRep& extSeiDecRefPicMrkRep)
2633 {
2634     extSeiDecRefPicMrkRep.original_idr_flag                = task.m_decRefPicMrkRep[fieldId].original_idr_flag;
2635     extSeiDecRefPicMrkRep.original_frame_num               = task.m_decRefPicMrkRep[fieldId].original_frame_num;
2636     extSeiDecRefPicMrkRep.original_field_info_present_flag = (frame_mbs_only_flag == 0);
2637 
2638     if (frame_mbs_only_flag == 0)
2639     {
2640         extSeiDecRefPicMrkRep.original_field_pic_flag    = task.m_decRefPicMrkRep[fieldId].original_field_pic_flag;
2641         extSeiDecRefPicMrkRep.original_bottom_field_flag = task.m_decRefPicMrkRep[fieldId].original_bottom_field_flag;
2642     }
2643 
2644     extSeiDecRefPicMrkRep.no_output_of_prior_pics_flag       = task.m_decRefPicMrkRep[fieldId].dec_ref_pic_marking.no_output_of_prior_pics_flag;
2645     extSeiDecRefPicMrkRep.long_term_reference_flag           = task.m_decRefPicMrkRep[fieldId].dec_ref_pic_marking.long_term_reference_flag;
2646     extSeiDecRefPicMrkRep.num_mmco_entries                   = task.m_decRefPicMrkRep[fieldId].dec_ref_pic_marking.mmco.Size();
2647     extSeiDecRefPicMrkRep.adaptive_ref_pic_marking_mode_flag = task.m_decRefPicMrkRep[fieldId].dec_ref_pic_marking.mmco.Size() > 0;
2648 
2649     for (mfxU8 i = 0; i < extSeiDecRefPicMrkRep.num_mmco_entries; i ++)
2650     {
2651         extSeiDecRefPicMrkRep.mmco[i]          =  task.m_decRefPicMrkRep[fieldId].dec_ref_pic_marking.mmco[i];
2652         extSeiDecRefPicMrkRep.value[i * 2]     =  task.m_decRefPicMrkRep[fieldId].dec_ref_pic_marking.value[i * 2];
2653         extSeiDecRefPicMrkRep.value[i * 2 + 1] =  task.m_decRefPicMrkRep[fieldId].dec_ref_pic_marking.value[i * 2 + 1];
2654     }
2655 
2656 }
2657 
PrepareSeiMessage(MfxVideoParam const & par,mfxExtAvcSeiRecPoint & msg)2658 void MfxHwH264Encode::PrepareSeiMessage(
2659     MfxVideoParam const &   par,
2660     mfxExtAvcSeiRecPoint &  msg)
2661 {
2662     mfxExtCodingOption2 & extOpt2 = GetExtBufferRef(par);
2663     mfxU32 numTL = par.calcParam.numTemporalLayer;
2664     if (extOpt2.IntRefType)
2665         // following calculation assumes that for multiple temporal layers last layer is always non-reference
2666         msg.recovery_frame_cnt = (extOpt2.IntRefCycleSize - 1) << (numTL > 2 ? (numTL >> 1) : 0);
2667     else
2668         msg.recovery_frame_cnt = par.mfx.GopPicSize;
2669     msg.exact_match_flag = 1;
2670     msg.broken_link_flag = 0;
2671     msg.changing_slice_group_idc = 0;
2672 }
2673 
CalculateSeiSize(mfxExtAvcSeiRecPoint const & msg)2674 mfxU32 MfxHwH264Encode::CalculateSeiSize( mfxExtAvcSeiRecPoint const & msg)
2675 {
2676     mfxU32 dataSizeInBits = ExpGolombCodeLength(msg.recovery_frame_cnt); // size of recovery_frame_cnt
2677     dataSizeInBits += 4; // exact_match_flag + broken_link_flag + changing_slice_group_idc
2678     mfxU32 dataSizeInBytes = (dataSizeInBits + 7) >> 3;
2679 
2680     return dataSizeInBytes;
2681 }
2682 
CalculateSeiSize(mfxExtAvcSeiDecRefPicMrkRep const & msg)2683 mfxU32 MfxHwH264Encode::CalculateSeiSize( mfxExtAvcSeiDecRefPicMrkRep const & msg)
2684 {
2685     mfxU32 dataSizeInBits = 0;
2686 
2687     // calculate size of sei_payload
2688     dataSizeInBits += ExpGolombCodeLength(msg.original_frame_num) + 1; // original_frame_num + original_idr_flag
2689 
2690     if (msg.original_field_info_present_flag)
2691         dataSizeInBits += msg.original_field_pic_flag == 0 ? 1 : 2; // original_field_info_present_flag + original_bottom_field_flag
2692 
2693     if (msg.original_idr_flag) {
2694         dataSizeInBits += 2; // no_output_of_prior_pics_flag + long_term_reference_flag
2695     }
2696     else {
2697         dataSizeInBits += 1; // adaptive_ref_pic_marking_mode_flag
2698         for (mfxU32 i = 0; i < msg.num_mmco_entries; i ++) {
2699             dataSizeInBits += ExpGolombCodeLength(msg.mmco[i]); // memory_management_control_operation
2700             dataSizeInBits += ExpGolombCodeLength(msg.value[2 * i]);
2701             if (msg.mmco[i] == 3)
2702                 dataSizeInBits += ExpGolombCodeLength(msg.value[2 * i + 1]);
2703         }
2704     }
2705 
2706     mfxU32 dataSizeInBytes = (dataSizeInBits + 7) >> 3;
2707     return dataSizeInBytes;
2708 }
2709 
2710 // MVC BD {
CalculateSeiSize(mfxExtAvcSeiBufferingPeriod const & msg)2711 mfxU32 MfxHwH264Encode::CalculateSeiSize( mfxExtAvcSeiBufferingPeriod const & msg)
2712 {
2713     mfxU32 dataSizeInBits =
2714         2 * msg.initial_cpb_removal_delay_length * (msg.nal_cpb_cnt + msg.vcl_cpb_cnt);
2715 
2716     dataSizeInBits += ExpGolombCodeLength(msg.seq_parameter_set_id);
2717     mfxU32 dataSizeInBytes = (dataSizeInBits + 7) >> 3;
2718 
2719     return dataSizeInBytes;
2720 }
2721 
CalculateSeiSize(mfxExtPictureTimingSEI const & extPt,mfxExtAvcSeiPicTiming const & msg)2722 mfxU32 MfxHwH264Encode::CalculateSeiSize(
2723     mfxExtPictureTimingSEI const & extPt,
2724     mfxExtAvcSeiPicTiming const &  msg)
2725 {
2726     mfxU32 dataSizeInBits = 0;
2727 
2728     if (msg.cpb_dpb_delays_present_flag)
2729     {
2730         dataSizeInBits += msg.cpb_removal_delay_length;
2731         dataSizeInBits += msg.dpb_output_delay_length;
2732     }
2733 
2734     if (msg.pic_struct_present_flag)
2735     {
2736         dataSizeInBits += 4; // msg.pic_struct;
2737 
2738         assert(msg.pic_struct <= 8);
2739         mfxU32 numClockTS = NUM_CLOCK_TS[std::min<mfxU8>(msg.pic_struct, 8)];
2740 
2741         dataSizeInBits += numClockTS; // clock_timestamp_flag[i]
2742         for (mfxU32 i = 0; i < numClockTS; i++)
2743         {
2744             if (extPt.TimeStamp[i].ClockTimestampFlag)
2745             {
2746                 mfxU32 tsSize = 19;
2747 
2748                 if (extPt.TimeStamp[i].FullTimestampFlag)
2749                 {
2750                     tsSize += 17;
2751                 }
2752                 else
2753                 {
2754                     tsSize += ((
2755                         extPt.TimeStamp[i].HoursFlag * 5 + 7) *
2756                         extPt.TimeStamp[i].MinutesFlag + 7) *
2757                         extPt.TimeStamp[i].SecondsFlag + 1;
2758                 }
2759 
2760                 dataSizeInBits += tsSize + msg.time_offset_length;
2761             }
2762         }
2763     }
2764 
2765     mfxU32 dataSizeInBytes = (dataSizeInBits + 7) >> 3;
2766 
2767     return dataSizeInBytes;
2768 }
2769 // MVC BD }
2770 
2771 
2772 namespace
2773 {
ConvertFrameTypeMfx2Umc(mfxU32 frameType)2774     UMC::FrameType ConvertFrameTypeMfx2Umc(mfxU32 frameType)
2775     {
2776         switch (frameType & 0xf)
2777         {
2778         case MFX_FRAMETYPE_I: return UMC::I_PICTURE;
2779         case MFX_FRAMETYPE_P: return UMC::P_PICTURE;
2780         case MFX_FRAMETYPE_B: return UMC::B_PICTURE;
2781         default: assert(!"wrong coding type"); return UMC::NONE_PICTURE;
2782         }
2783     }
2784 
ConvertPicStructMfx2Umc(mfxU32 picStruct)2785     mfxI32 ConvertPicStructMfx2Umc(mfxU32 picStruct)
2786     {
2787         switch (picStruct)
2788         {
2789         case MFX_PICSTRUCT_PROGRESSIVE: return UMC::PS_FRAME;
2790         case MFX_PICSTRUCT_FIELD_TFF:   return UMC::PS_TOP_FIELD;
2791         case MFX_PICSTRUCT_FIELD_BFF:   return UMC::PS_BOTTOM_FIELD;
2792         default: assert(!"bad picStruct"); return UMC::PS_FRAME;
2793         }
2794     }
2795 };
2796 
Init(MfxVideoParam & video)2797 mfxStatus UmcBrc::Init(MfxVideoParam  & video)
2798 {
2799     assert(
2800         video.mfx.RateControlMethod == MFX_RATECONTROL_CBR ||
2801         video.mfx.RateControlMethod == MFX_RATECONTROL_VBR ||
2802         video.mfx.RateControlMethod == MFX_RATECONTROL_AVBR);
2803 
2804     mfxExtCodingOption2 const & extOpt2 = GetExtBufferRef(video);
2805     m_lookAhead = extOpt2.LookAheadDepth;
2806 
2807     mfxVideoParam tmpVideo = video;
2808     tmpVideo.mfx.GopRefDist = (extOpt2.LookAheadDepth >= 5) ? 1 : tmpVideo.mfx.GopRefDist;
2809 
2810     UMC::VideoBrcParams umcBrcParams;
2811     mfxStatus sts = ConvertVideoParam_Brc(&tmpVideo, &umcBrcParams);
2812     assert(sts == MFX_ERR_NONE);
2813     (void)sts;
2814 
2815     umcBrcParams.GOPPicSize = tmpVideo.mfx.GopPicSize;
2816     umcBrcParams.GOPRefDist = tmpVideo.mfx.GopRefDist;
2817     umcBrcParams.profile    = tmpVideo.mfx.CodecProfile;
2818     umcBrcParams.level      = tmpVideo.mfx.CodecLevel;
2819 
2820     UMC::Status umcSts = m_impl.Init(&umcBrcParams);
2821     assert(umcSts == UMC::UMC_OK);
2822     (void)umcSts;
2823 
2824     return MFX_ERR_NONE;
2825 }
2826 
Close()2827 void UmcBrc::Close()
2828 {
2829     m_impl.Close();
2830 }
2831 
GetQp(const BRCFrameParams & par,mfxBRCFrameCtrl & frameCtrl)2832 void UmcBrc::GetQp(const BRCFrameParams& par, mfxBRCFrameCtrl &frameCtrl)
2833 {
2834     mfxU32 frameType = par.FrameType;
2835     if (m_lookAhead >= 5 && (frameType & MFX_FRAMETYPE_B))
2836         frameType = MFX_FRAMETYPE_P | MFX_FRAMETYPE_REF;
2837     UMC::FrameType umcFrameType = ConvertFrameTypeMfx2Umc(frameType);
2838     m_impl.SetPictureFlags(umcFrameType, ConvertPicStructMfx2Umc(par.picStruct));
2839 
2840     frameCtrl.QpY = (mfxU8)m_impl.GetQP(umcFrameType);
2841 }
2842 
GetQpForRecode(const BRCFrameParams & par,mfxBRCFrameCtrl & frameCtrl)2843 void UmcBrc::GetQpForRecode(const BRCFrameParams& par, mfxBRCFrameCtrl &frameCtrl)
2844 {
2845     frameCtrl.QpY = mfx::clamp(frameCtrl.QpY + (mfxU8)par.NumRecode, 1, 51);
2846 }
2847 
GetFractionalQp(const BRCFrameParams & par)2848 mfxF32 UmcBrc::GetFractionalQp(const BRCFrameParams& par)
2849 {
2850     mfxU32 frameType = par.FrameType;
2851     if (m_lookAhead >= 5 && (frameType & MFX_FRAMETYPE_B))
2852         frameType = MFX_FRAMETYPE_P | MFX_FRAMETYPE_REF;
2853     UMC::FrameType umcFrameType = ConvertFrameTypeMfx2Umc(frameType);
2854     m_impl.SetPictureFlags(umcFrameType, ConvertPicStructMfx2Umc(par.picStruct));
2855     return 0.f;//m_impl.GetFractionalQP(umcFrameType);
2856 }
2857 
SetQp(const BRCFrameParams & par,mfxBRCFrameCtrl & frameCtrl)2858 void UmcBrc::SetQp(const BRCFrameParams& par, mfxBRCFrameCtrl &frameCtrl)
2859 {
2860     mfxU32 frameType = par.FrameType;
2861     if (m_lookAhead >= 5 && (frameType & MFX_FRAMETYPE_B))
2862         frameType = MFX_FRAMETYPE_P | MFX_FRAMETYPE_REF;
2863     m_impl.SetQP(frameCtrl.QpY, ConvertFrameTypeMfx2Umc(frameType));
2864 }
2865 
PreEnc(const BRCFrameParams & par,std::vector<VmeData * > const & vmeData)2866 void UmcBrc::PreEnc(const BRCFrameParams& par, std::vector<VmeData *> const & vmeData)
2867 {
2868     for (size_t i = 0; i < vmeData.size(); i++)
2869     {
2870         if (vmeData[i]->encOrder == par.EncodedOrder)
2871         {
2872             m_impl.PreEncFrame(ConvertFrameTypeMfx2Umc(par.FrameType), vmeData[i]->intraCost, vmeData[i]->interCost);
2873             break;
2874         }
2875     }
2876 }
2877 
Report(const BRCFrameParams & par,mfxU32 userDataLength,mfxU32,mfxBRCFrameCtrl &)2878 mfxU32 UmcBrc::Report(const BRCFrameParams& par,  mfxU32 userDataLength, mfxU32 /*maxFrameSize*/, mfxBRCFrameCtrl &/*frameCtrl*/)
2879 {
2880     return m_impl.PostPackFrame(ConvertFrameTypeMfx2Umc(par.FrameType), 8 * par.CodedFrameSize, userDataLength * 8, par.NumRecode, par.EncodedOrder);
2881 }
2882 
GetMinFrameSize()2883 mfxU32 UmcBrc::GetMinFrameSize()
2884 {
2885     mfxI32 minSize = 0;
2886     //m_impl.GetMinMaxFrameSize(&minSize, 0);
2887     assert(minSize >= 0);
2888     return mfxU32(minSize + 7) / 8;
2889 }
2890 
2891 #ifdef _DEBUG
2892 //#define brcprintf printf
2893 #else // _DEBUG
2894 //#define brcprintf
2895 #endif // _DEBUG
2896 
2897 #ifndef brcprintf
2898 #define brcprintf(...)
2899 #endif // brcprintf
2900 
2901 namespace MfxHwH264EncodeHW
2902 {
2903     mfxF64 const INTRA_QSTEP_COEFF  = 2.0;
2904     mfxI32 const MAX_QP_CHANGE      = 2;
2905     mfxF64 const LOG2_64            = 3.0;
2906     mfxF64 const MIN_EST_RATE       = 0.3;
2907     mfxF64 const NORM_EST_RATE      = 100.0;
2908 
2909     mfxF64 const MIN_RATE_COEFF_CHANGE = 0.5;
2910     mfxF64 const MAX_RATE_COEFF_CHANGE = 2.0;
2911     mfxF64 const INIT_RATE_COEFF[] = {
2912         1.109, 1.196, 1.225, 1.309, 1.369, 1.428, 1.490, 1.588, 1.627, 1.723, 1.800, 1.851, 1.916,
2913         2.043, 2.052, 2.140, 2.097, 2.096, 2.134, 2.221, 2.084, 2.153, 2.117, 2.014, 1.984, 2.006,
2914         1.801, 1.796, 1.682, 1.549, 1.485, 1.439, 1.248, 1.221, 1.133, 1.045, 0.990, 0.987, 0.895,
2915         0.921, 0.891, 0.887, 0.896, 0.925, 0.917, 0.942, 0.964, 0.997, 1.035, 1.098, 1.170, 1.275
2916     };
2917 
GetSkippedQp(MbData const & mb)2918     mfxU8 GetSkippedQp(MbData const & mb)
2919     {
2920         if (mb.intraMbFlag)
2921             return 52; // never skipped
2922         if (abs(mb.mv[0].x - mb.costCenter0.x) >= 4 ||
2923             abs(mb.mv[0].y - mb.costCenter0.y) >= 4 ||
2924             abs(mb.mv[1].x - mb.costCenter1.x) >= 4 ||
2925             abs(mb.mv[1].y - mb.costCenter1.y) >= 4)
2926             return 52; // never skipped
2927 
2928         mfxU16 const * sumc = mb.lumaCoeffSum;
2929         mfxU8  const * nzc  = mb.lumaCoeffCnt;
2930 
2931         if (nzc[0] + nzc[1] + nzc[2] + nzc[3] == 0)
2932             return 0; // skipped at any qp
2933 
2934         mfxF64 qoff = 1.0 / 6;
2935         mfxF64 norm = 0.1666;
2936 
2937         mfxF64 qskip = std::max({
2938             nzc[0] ? (sumc[0] * norm / nzc[0]) / (1.0 - qoff) * LOG2_64 : 0.0,
2939             nzc[1] ? (sumc[1] * norm / nzc[1]) / (1.0 - qoff) * LOG2_64 : 0.0,
2940             nzc[2] ? (sumc[2] * norm / nzc[2]) / (1.0 - qoff) * LOG2_64 : 0.0,
2941             nzc[3] ? (sumc[3] * norm / nzc[3]) / (1.0 - qoff) * LOG2_64 : 0.0});
2942 
2943         return QStep2QpCeil(qskip);
2944     }
2945 }
2946 using namespace MfxHwH264EncodeHW;
SetMinMaxQP(mfxExtCodingOption2 const & extOpt2,mfxU8 QPMin[],mfxU8 QPMax[])2947 inline void SetMinMaxQP(mfxExtCodingOption2 const &  extOpt2, mfxU8  QPMin[], mfxU8  QPMax[])
2948 {
2949     // valid qp range [1, 51], default qp range [8, 51], qp=0 doesn't supported by driver
2950     QPMin[0] = (extOpt2.MinQPI) ? extOpt2.MinQPI : 8;
2951     QPMin[1] = (extOpt2.MinQPP) ? extOpt2.MinQPP : 8;
2952     QPMin[2] = (extOpt2.MinQPB) ? extOpt2.MinQPB : 8;
2953 
2954     QPMax[0] = (extOpt2.MaxQPI) ? extOpt2.MaxQPI : 51;
2955     QPMax[1] = (extOpt2.MaxQPP) ? extOpt2.MaxQPP : 51;
2956     QPMax[2] = (extOpt2.MaxQPB) ? extOpt2.MaxQPB : 51;
2957 }
2958 
2959 enum
2960 {
2961     MFX_BRC_ACCURACY_UNKNOWN,
2962     MFX_BRC_ACCURACY_LOW,
2963     MFX_BRC_ACCURACY_MEDIUM,
2964     MFX_BRC_ACCURACY_HIGH
2965 };
2966 
2967 
2968 struct sBrcTarget
2969 {
2970     mfxU32 rateCalcPeriodInSec; //  0 - all frames are used in rate calculation, otherwise the last n seconds
2971     mfxU32 accuracy;            //  MFX_BRC_ACCURACY_LOW, MFX_BRC_ACCURACY_MEDIUM, MFX_BRC_ACCURACY_HIGH
2972 };
2973 
2974 
2975 
setLAThresholds(sLAThresholds & thresholds,mfxU32 accuracy)2976 void setLAThresholds(sLAThresholds& thresholds, mfxU32 accuracy)
2977 {
2978     thresholds.minFramesForClassicLA = 30;
2979     thresholds.minFramesForStat = 10;
2980 
2981     if (accuracy == MFX_BRC_ACCURACY_HIGH)
2982     {
2983         thresholds.minCostCalcPeriod = 20;
2984         thresholds.maxRateRatioLocal = 1.3;
2985         thresholds.minRateRatioLocal = 0.9;
2986         thresholds.maxAvgRateRatio = 1.02;
2987         thresholds.minAvgRateRatio = 1.00;
2988     }
2989     else if (accuracy == MFX_BRC_ACCURACY_LOW)
2990     {
2991         thresholds.minCostCalcPeriod = 40;
2992         thresholds.maxRateRatioLocal = 1.5;
2993         thresholds.minRateRatioLocal = 0.7;
2994         thresholds.maxAvgRateRatio = 1.08;
2995         thresholds.minAvgRateRatio = 1.00;
2996     }
2997     else
2998     {
2999         // MFX_BRC_ACCURACY_MEDIUM
3000         thresholds.minCostCalcPeriod = 30;
3001         thresholds.maxRateRatioLocal = 1.4;
3002         thresholds.minRateRatioLocal = 0.85;
3003         thresholds.maxAvgRateRatio = 1.05;
3004         thresholds.minAvgRateRatio = 1.00;
3005     }
3006 }
3007 
criticalRatio(mfxF64 maxRatio)3008 inline mfxF64 criticalRatio(mfxF64 maxRatio)
3009 {
3010     // limit overrun more than twice is critical
3011     // example: for maxRatio = 1.4 (40% limit), 80% excess is critical
3012     return (maxRatio > 1.0) ? (2.0*maxRatio - 1.0) : maxRatio;
3013 }
3014 
Init(MfxVideoParam & video)3015 mfxStatus LookAheadBrc2::Init(MfxVideoParam  & video)
3016 {
3017     mfxExtCodingOptionDDI const & extDdi  = GetExtBufferRef(video);
3018     mfxExtCodingOption2   const & extOpt2 = GetExtBufferRef(video);
3019     mfxExtCodingOption3   const & extOpt3 = GetExtBufferRef(video);
3020 
3021     sBrcTarget brcTarget = {0, MFX_BRC_ACCURACY_MEDIUM};
3022     setLAThresholds(m_thresholds, brcTarget.accuracy);
3023 
3024     m_lookAhead     = extOpt2.LookAheadDepth - extDdi.LookAheadDependency;
3025     m_lookAheadDep  = extDdi.LookAheadDependency;
3026     m_LaScaleFactor = LaDSenumToFactor(extOpt2.LookAheadDS);
3027     m_qpUpdateRange = extDdi.QpUpdateRange;
3028     m_strength      = extDdi.StrengthN;
3029 
3030     m_fr = mfxF64(video.mfx.FrameInfo.FrameRateExtN) / video.mfx.FrameInfo.FrameRateExtD;
3031     m_totNumMb = video.mfx.FrameInfo.Width * video.mfx.FrameInfo.Height / 256;
3032     m_initTargetRate     = 1000* video.calcParam.targetKbps /m_fr / m_totNumMb;
3033 
3034     m_currRate  = m_initTargetRate;
3035 
3036     m_laData.reserve(m_lookAhead+1);
3037 
3038     assert(extDdi.RegressionWindow <= m_rateCoeffHistory[0].MAX_WINDOW);
3039     for (mfxU32 qp = 0; qp < 52; qp++)
3040         m_rateCoeffHistory[qp].Reset(extDdi.RegressionWindow, 100.0, 100.0 * INIT_RATE_COEFF[qp]);
3041     m_framesBehind = 0;
3042     m_bitsBehind = 0.0;
3043     m_curQp = -1;
3044     m_curBaseQp = -1;
3045     //m_coef = 4;
3046     m_skipped = 0;
3047 
3048     m_maxFrameSizeForRec = 0;
3049 
3050     m_AvgBitrate = 0;
3051 
3052     SetMinMaxQP(extOpt2, m_QPMin, m_QPMax);
3053 
3054     if (extOpt3.WinBRCSize)
3055     {
3056         m_AvgBitrate = new AVGBitrate(extOpt3.WinBRCSize, (mfxU32)(1000.0* video.calcParam.WinBRCMaxAvgKbps / m_fr), (mfxU32)(1000.0* video.calcParam.targetKbps / m_fr), true);
3057     }
3058     if (brcTarget.rateCalcPeriodInSec != 0)
3059         m_rateCalcPeriod = (brcTarget.rateCalcPeriodInSec < 0x1fffffff) ? (mfxU32)(brcTarget.rateCalcPeriodInSec*m_fr) : 0xffffffff;
3060     else  if (video.mfx.RateControlMethod == MFX_RATECONTROL_LA_HRD)
3061         m_rateCalcPeriod = (mfxU32)(8000.0 * video.mfx.BufferSizeInKB / video.mfx.TargetKbps*m_fr * 2.0);
3062     else
3063         m_rateCalcPeriod = 0xffffffff;
3064 
3065     m_hrd.reset();
3066     if (video.mfx.RateControlMethod == MFX_RATECONTROL_LA_HRD)
3067     {
3068         m_hrd.reset(new Hrd);
3069         m_hrd->Setup(video);
3070     }
3071 
3072     m_MaxframeSize[0] = std::max(extOpt3.MaxFrameSizeI, extOpt2.MaxFrameSize);
3073     m_MaxframeSize[1] = std::max(extOpt3.MaxFrameSizeP, extOpt2.MaxFrameSize);
3074     m_MaxframeSize[2] = extOpt2.MaxFrameSize;
3075 
3076 
3077     m_costCalcPeriod = std::max<mfxU32>(m_lookAhead, m_thresholds.minCostCalcPeriod);
3078 
3079     m_laDataStat.reserve(m_costCalcPeriod - m_lookAhead + 1);
3080     m_AsyncDepth = video.AsyncDepth > 1 ? 1 : 0;
3081     m_first = 0;
3082 
3083     return MFX_ERR_NONE;
3084 
3085 }
Close()3086 void LookAheadBrc2::Close()
3087 {
3088 
3089     if (m_AvgBitrate)
3090     {
3091        delete m_AvgBitrate;
3092        m_AvgBitrate = 0;
3093     }
3094 }
3095 
3096 
Init(MfxVideoParam & video)3097 mfxStatus VMEBrc::Init(MfxVideoParam  & video)
3098 {
3099     mfxExtCodingOptionDDI const & extDdi  = GetExtBufferRef(video);
3100     mfxExtCodingOption2   const & extOpt2 = GetExtBufferRef(video);
3101     mfxExtCodingOption3   const & extOpt3 = GetExtBufferRef(video);
3102 
3103 
3104     m_LaScaleFactor = LaDSenumToFactor(extOpt2.LookAheadDS);
3105     m_qpUpdateRange = extDdi.QpUpdateRange;
3106     m_strength      = extDdi.StrengthN;
3107 
3108     m_fr = mfxF64(video.mfx.FrameInfo.FrameRateExtN) / video.mfx.FrameInfo.FrameRateExtD;
3109 
3110     m_totNumMb = video.mfx.FrameInfo.Width * video.mfx.FrameInfo.Height / 256;
3111     m_initTargetRate = 1000* video.calcParam.targetKbps / m_fr / m_totNumMb;
3112     m_targetRateMin = m_initTargetRate;
3113     m_targetRateMax = m_initTargetRate;
3114     m_laData.resize(0);
3115 
3116     assert(extDdi.RegressionWindow <= m_rateCoeffHistory[0].MAX_WINDOW);
3117     for (mfxU32 qp = 0; qp < 52; qp++)
3118         m_rateCoeffHistory[qp].Reset(extDdi.RegressionWindow, 100.0, 100.0 * INIT_RATE_COEFF[qp]);
3119     m_framesBehind = 0;
3120     m_bitsBehind = 0.0;
3121     m_curQp = -1;
3122     m_curBaseQp = -1;
3123     m_skipped = 0;
3124     m_lookAhead = 0;
3125     SetMinMaxQP(extOpt2, m_QPMin, m_QPMax);
3126 
3127     m_AvgBitrate = 0;
3128     if (extOpt3.WinBRCSize)
3129     {
3130         m_AvgBitrate = new AVGBitrate(extOpt3.WinBRCSize, (mfxU32)(1000.0 * video.calcParam.WinBRCMaxAvgKbps/m_fr),(mfxU32)(1000.0* video.calcParam.targetKbps / m_fr),true);
3131     }
3132     return MFX_ERR_NONE;
3133 }
Close()3134 void VMEBrc::Close()
3135 {
3136 
3137     if (m_AvgBitrate)
3138     {
3139        delete m_AvgBitrate;
3140        m_AvgBitrate = 0;
3141     }
3142 }
3143 
SetFrameVMEData(const mfxExtLAFrameStatistics * pLaOut,mfxU32 width,mfxU32 height)3144 mfxStatus VMEBrc::SetFrameVMEData(const mfxExtLAFrameStatistics *pLaOut, mfxU32 width, mfxU32 height)
3145 {
3146     mfxU32 resNum = 0;
3147     mfxU32 numLaFrames = pLaOut->NumFrame;
3148     mfxU32 k = height*width >> 7;
3149     while(resNum < pLaOut->NumStream)
3150     {
3151         if (pLaOut->FrameStat[resNum*numLaFrames].Height == height &&
3152             pLaOut->FrameStat[resNum*numLaFrames].Width  == width)
3153             break;
3154         resNum ++;
3155     }
3156     MFX_CHECK(resNum <  pLaOut->NumStream, MFX_ERR_UNDEFINED_BEHAVIOR);
3157     mfxLAFrameInfo * pFrameData = pLaOut->FrameStat + numLaFrames*resNum;
3158 
3159 
3160     if (m_lookAhead == 0)
3161         m_lookAhead = numLaFrames;
3162 
3163     std::list<LaFrameData>::iterator it = m_laData.begin();
3164     while (m_laData.size()>0)
3165     {
3166         it = m_laData.begin();
3167         if (!((*it).bNotUsed))
3168             break;
3169         m_laData.pop_front();
3170     }
3171 
3172     // some frames can be stored already
3173     // start of stored sequence
3174     it = m_laData.begin();
3175      while (it != m_laData.end())
3176     {
3177         if ((*it).encOrder == pFrameData[0].FrameEncodeOrder)
3178             break;
3179         ++it;
3180     }
3181     mfxU32 ind  = 0;
3182 
3183     // check stored sequence
3184     while ((it != m_laData.end()) && (ind < numLaFrames))
3185     {
3186         MFX_CHECK((*it).encOrder == pFrameData[ind].FrameEncodeOrder, MFX_ERR_UNDEFINED_BEHAVIOR);
3187         ++ind;
3188         ++it;
3189     }
3190     MFX_CHECK(it == m_laData.end(), MFX_ERR_UNDEFINED_BEHAVIOR);
3191 
3192     // store a new data
3193     for (; ind < numLaFrames; ind++)
3194     {
3195         LaFrameData data = {};
3196 
3197         data.encOrder  = pFrameData[ind].FrameEncodeOrder;
3198         data.dispOrder = pFrameData[ind].FrameDisplayOrder;
3199         data.interCost = pFrameData[ind].InterCost;
3200         data.intraCost = pFrameData[ind].IntraCost;
3201         data.propCost  = pFrameData[ind].DependencyCost;
3202         data.bframe    = (pFrameData[ind].FrameType & MFX_FRAMETYPE_B) != 0;
3203 
3204 
3205         MFX_CHECK(data.intraCost, MFX_ERR_UNDEFINED_BEHAVIOR);
3206 
3207         for (mfxU32 qp = 0; qp < 52; qp++)
3208         {
3209             data.estRate[qp] = ((mfxF64)pFrameData[ind].EstimatedRate[qp])/(MFX_QSTEP[qp]*k);
3210         }
3211         m_laData.push_back(data);
3212     }
3213 
3214     return MFX_ERR_NONE;
3215 }
3216 
SelectQp(mfxF64 erate[52],mfxF64 budget)3217 mfxU8 SelectQp(mfxF64 erate[52], mfxF64 budget)
3218 {
3219     for (mfxU8 qp = 1; qp < 52; qp++)
3220         if (erate[qp] < budget)
3221             return (erate[qp - 1] + erate[qp] < 2 * budget) ? qp - 1 : qp;
3222     return 51;
3223 }
3224 
GetTotalRate(std::vector<LookAheadBrc2::LaFrameData> const & laData,mfxI32 baseQp,size_t size,mfxU32 first)3225 mfxF64 GetTotalRate(std::vector<LookAheadBrc2::LaFrameData> const & laData, mfxI32 baseQp, size_t size, mfxU32 first)
3226 {
3227     mfxF64 totalRate = 0.0;
3228     size = (size < laData.size()) ? size : laData.size();
3229     for (size_t i = 0 + first; i < size; i++)
3230         totalRate += laData[i].estRateTotal[mfx::clamp(baseQp + laData[i].deltaQp, 0, 51)];
3231     return totalRate;
3232 }
3233 
GetTotalRate(std::vector<LookAheadBrc2::LaFrameData> const & laData,std::vector<LookAheadBrc2::LaFrameData> const & laDataStat,mfxI32 baseQp,size_t size,mfxU32 first)3234 mfxF64 GetTotalRate(std::vector<LookAheadBrc2::LaFrameData> const & laData,
3235                     std::vector<LookAheadBrc2::LaFrameData> const & laDataStat,
3236                     mfxI32 baseQp, size_t size, mfxU32 first)
3237 {
3238     mfxF64 totalRate = GetTotalRate(laData, baseQp, size, first);
3239     //old frames from this period
3240     totalRate += GetTotalRate(laDataStat, baseQp, laDataStat.size(), 0);
3241 
3242     return totalRate;
3243 }
3244 
GetTotalRate(std::list<VMEBrc::LaFrameData>::iterator start,std::list<VMEBrc::LaFrameData>::iterator end,mfxI32 baseQp)3245 mfxF64 GetTotalRate(std::list<VMEBrc::LaFrameData>::iterator start, std::list<VMEBrc::LaFrameData>::iterator end, mfxI32 baseQp)
3246 {
3247     mfxF64 totalRate = 0.0;
3248     std::list<VMEBrc::LaFrameData>::iterator it = start;
3249     for (; it!=end; ++it)
3250     {
3251         totalRate += (*it).estRateTotal[mfx::clamp(baseQp + (*it).deltaQp, 0, 51)];
3252     }
3253     return totalRate;
3254 }
3255 
GetTotalRate(std::list<VMEBrc::LaFrameData>::iterator start,std::list<VMEBrc::LaFrameData>::iterator end,mfxI32 baseQp,size_t size)3256 mfxF64 GetTotalRate(std::list<VMEBrc::LaFrameData>::iterator start, std::list<VMEBrc::LaFrameData>::iterator end, mfxI32 baseQp, size_t size)
3257 {
3258     mfxF64 totalRate = 0.0;
3259     size_t num = 0;
3260 
3261     std::list<VMEBrc::LaFrameData>::iterator it = start;
3262     for (; it!=end; ++it)
3263     {
3264         if ((num ++) >= size)
3265             break;
3266         totalRate += (*it).estRateTotal[mfx::clamp(baseQp + (*it).deltaQp, 0, 51)];
3267     }
3268     return totalRate;
3269 }
3270 
SelectQp(std::vector<LookAheadBrc2::LaFrameData> const & laData,mfxF64 budget,size_t size,mfxU32 async)3271 mfxU8 SelectQp(std::vector<LookAheadBrc2::LaFrameData> const & laData, mfxF64 budget, size_t size, mfxU32 async)
3272 {
3273     mfxF64 prevTotalRate = GetTotalRate(laData, 0, size, async);
3274     //printf("SelectQp: budget = %f, size = %d, async = %d\n", budget, size, async);
3275     for (mfxU8 qp = 1; qp < 52; qp++)
3276     {
3277         mfxF64 totalRate = GetTotalRate(laData, qp, size, async);
3278         if (totalRate < budget)
3279             return (prevTotalRate + totalRate < 2 * budget) ? qp - 1 : qp;
3280         else
3281             prevTotalRate = totalRate;
3282     }
3283     return 51;
3284 }
3285 
SelectQp(std::vector<LookAheadBrc2::LaFrameData> const & laData,std::vector<LookAheadBrc2::LaFrameData> const & laDataStat,mfxF64 budget,size_t size,mfxU32 async)3286 mfxU8 SelectQp(std::vector<LookAheadBrc2::LaFrameData> const & laData,
3287                std::vector<LookAheadBrc2::LaFrameData> const & laDataStat,
3288                 mfxF64 budget, size_t size, mfxU32 async)
3289 {
3290     mfxF64 prevTotalRate = GetTotalRate(laData, laDataStat, 0, size, async);
3291     //printf("SelectQp: budget = %f, size = %d, async = %d\n", budget, size, async);
3292     for (mfxU8 qp = 1; qp < 52; qp++)
3293     {
3294         mfxF64 totalRate = GetTotalRate(laData, laDataStat, qp, size, async);
3295         if (totalRate < budget)
3296             return (prevTotalRate + totalRate < 2 * budget) ? qp - 1 : qp;
3297         prevTotalRate = totalRate;
3298     }
3299     return 51;
3300 }
3301 
SelectQp(std::list<VMEBrc::LaFrameData>::iterator start,std::list<VMEBrc::LaFrameData>::iterator end,mfxF64 budget,size_t size)3302 mfxU8 SelectQp(std::list<VMEBrc::LaFrameData>::iterator start, std::list<VMEBrc::LaFrameData>::iterator end, mfxF64 budget, size_t size)
3303 {
3304     mfxF64 prevTotalRate = GetTotalRate(start, end, 0, size);
3305     //printf("SelectQp: budget = %f, size = %d, async = %d\n", budget, size, async);
3306     for (mfxU8 qp = 1; qp < 52; qp++)
3307     {
3308         mfxF64 totalRate = GetTotalRate(start, end, qp, size);
3309         if (totalRate < budget)
3310             return (prevTotalRate + totalRate < 2 * budget) ? qp - 1 : qp;
3311         else
3312             prevTotalRate = totalRate;
3313     }
3314     return 51;
3315 }
3316 
SelectQp(std::list<VMEBrc::LaFrameData>::iterator start,std::list<VMEBrc::LaFrameData>::iterator end,mfxF64 budget)3317 mfxU8 SelectQp(std::list<VMEBrc::LaFrameData>::iterator start, std::list<VMEBrc::LaFrameData>::iterator end, mfxF64 budget)
3318 {
3319     mfxF64 prevTotalRate = GetTotalRate(start,end, 0);
3320     for (mfxU8 qp = 1; qp < 52; qp++)
3321     {
3322         mfxF64 totalRate = GetTotalRate(start,end, qp);
3323         if (totalRate < budget)
3324             return (prevTotalRate + totalRate < 2 * budget) ? qp - 1 : qp;
3325         else
3326             prevTotalRate = totalRate;
3327     }
3328     return 51;
3329 }
3330 
GetFrameTypeLetter(mfxU32 frameType)3331 mfxU8 GetFrameTypeLetter(mfxU32 frameType)
3332 {
3333     mfxU32 ref = (frameType & MFX_FRAMETYPE_REF) ? 0 : 'a' - 'A';
3334     if (frameType & MFX_FRAMETYPE_I)
3335         return mfxU8('I' + ref);
3336     if (frameType & MFX_FRAMETYPE_P)
3337         return mfxU8('P' + ref);
3338     if (frameType & MFX_FRAMETYPE_B)
3339         return mfxU8('B' + ref);
3340     return 'x';
3341 }
GetFrameTypeIndex(mfxU32 frameType)3342 inline mfxU32 GetFrameTypeIndex(mfxU32 frameType)
3343 {
3344     if (frameType & MFX_FRAMETYPE_I)
3345         return 0;
3346     if (frameType & MFX_FRAMETYPE_P)
3347         return 1;
3348     if (frameType & MFX_FRAMETYPE_B)
3349         return 2;
3350     return 0;
3351 }
3352 
GetQp(const BRCFrameParams & par,mfxBRCFrameCtrl & frameCtrl)3353 void LookAheadBrc2::GetQp(const BRCFrameParams& par, mfxBRCFrameCtrl &frameCtrl)
3354 {
3355     (void)par;
3356     MFX_AUTO_LTRACE(MFX_TRACE_LEVEL_INTERNAL, "LookAheadBrc2::GetQp");
3357     brcprintf("\r%4d: do=%4d type=%c Rt=%7.3f-%7.3f curc=%4d numc=%2d ", m_laData[0].encOrder, m_laData[0].poc/2,
3358         GetFrameTypeLetter(par.FrameType), m_targetRateMin, m_targetRateMax, m_laData[0].interCost / m_totNumMb, mfxU32(m_laData.size()));
3359 
3360 
3361     mfxF64 totalEstRate[52] = { 0.0 };
3362 
3363     for (mfxU32 qp = 0; qp < 52; qp++)
3364     {
3365         mfxF64 rateCoeff = m_rateCoeffHistory[qp].GetCoeff();
3366         for (mfxU32 i = m_first; i < m_laData.size(); i++)
3367         {
3368             m_laData[i].estRateTotal[qp] = std::max(MIN_EST_RATE, rateCoeff * m_laData[i].estRate[qp]);
3369             totalEstRate[qp] += m_laData[i].estRateTotal[qp];
3370         }
3371         for (size_t i = 0; i < m_laDataStat.size(); i++)
3372         {
3373             m_laDataStat[i].estRateTotal[qp] = std::max(MIN_EST_RATE, rateCoeff * m_laDataStat[i].estRate[qp]);
3374         }
3375     }
3376 
3377     mfxI32 maxDeltaQp = INT_MIN;
3378     if (m_lookAheadDep > 0)
3379     {
3380         mfxI32 curQp = m_curBaseQp < 0 ? SelectQp(totalEstRate, m_initTargetRate * m_laData.size()) : m_curBaseQp;
3381         mfxF64 strength = 0.03 * curQp + .75;
3382 
3383         for (mfxU32 i = m_first; i < m_laData.size(); i++)
3384         {
3385             mfxU32 intraCost    = m_laData[i].intraCost;
3386             mfxU32 interCost    = m_laData[i].interCost;
3387             mfxU32 propCost     = m_laData[i].propCost;
3388             mfxF64 ratio        = 1.0;//mfxF64(interCost) / intraCost;
3389             mfxF64 deltaQp      = log((intraCost + propCost * ratio) / intraCost) / log(2.0);
3390             m_laData[i].deltaQp = (interCost >= intraCost * 0.9)
3391                 ? -mfxI32(deltaQp * 2 * strength + 0.5)
3392                 : -mfxI32(deltaQp * 1 * strength + 0.5);
3393             maxDeltaQp = std::max(maxDeltaQp, m_laData[i].deltaQp);
3394         }
3395     }
3396     else
3397     {
3398         for (mfxU32 i = m_first; i < m_laData.size(); i++)
3399         {
3400             mfxU32 intraCost    = m_laData[i].intraCost;
3401             mfxU32 interCost    = m_laData[i].interCost;
3402             m_laData[i].deltaQp = (interCost >= intraCost * 0.9) ? -5 : m_laData[i].bframe ? 0 : -2;
3403             maxDeltaQp = std::max(maxDeltaQp, m_laData[i].deltaQp);
3404         }
3405     }
3406 
3407     for (mfxU32 i = m_first; i < m_laData.size(); i++)
3408         m_laData[i].deltaQp -= maxDeltaQp;
3409 
3410     mfxF64  MaxRate = 0;
3411     mfxF64  MinRate = 0;
3412     bool   bStartOfStream = (par.EncodedOrder < m_thresholds.minFramesForStat);
3413     mfxF64  currRateLocal = m_initTargetRate;
3414     bool   bLocalMin = false;
3415     if (m_lookAhead < m_thresholds.minFramesForClassicLA)
3416     {
3417         if (bStartOfStream)
3418         {
3419             //MaxRate is increased at the start of stream in the case of short LA to avoid cutting the fist frame.
3420             static mfxF64 k[] = { 5.0, 4.0,  3.0,  2.0, 1.95,  1.9, 1.85, 1.83, 1.8, 1.7,
3421                                   1.6, 1.55, 1.50, 1.45,1.35, 1.30, 1.25, 1.20, 1.15, 1.10, 1.0};
3422             MinRate = MaxRate = (m_currRate == m_initTargetRate) ?
3423                 m_initTargetRate * k[std::min(m_lookAhead, (mfxU32)(sizeof(k)/sizeof(k[0])-1))] :
3424                 (m_currRate + m_initTargetRate) / 2.0;
3425              MaxRate = std::max(MaxRate, m_initTargetRate);
3426              MinRate = std::min(MinRate, m_initTargetRate);
3427         }
3428         else
3429         {
3430             currRateLocal = GetTotalRate(m_laData, m_laDataStat, m_curBaseQp, m_laData.size(), m_first) / (m_laDataStat.size() + m_laData.size());
3431             if (currRateLocal > m_initTargetRate*criticalRatio(m_thresholds.maxRateRatioLocal))
3432             {
3433                 bLocalMin = true;
3434                 MaxRate = m_initTargetRate * criticalRatio(m_thresholds.maxRateRatioLocal);
3435             }
3436         }
3437     }
3438     else
3439     {
3440         mfxF64 rateCalcPeriod = std::min<mfxF64>(m_rateCalcPeriod, m_framesBehind);
3441         MinRate = MaxRate = std::max((m_initTargetRate *(rateCalcPeriod + m_costCalcPeriod) - m_currRate * rateCalcPeriod) / m_costCalcPeriod, m_initTargetRate/60.0);
3442     }
3443 
3444     mfxU8  minQp = m_QPMin[0];
3445     mfxU8  maxQp = m_QPMax[0];
3446     mfxI32 baseQP = mfx::clamp<mfxI32>(m_curBaseQp, m_QPMin[0], m_QPMax[0]);
3447     mfxF64 ratio = m_currRate / m_initTargetRate;
3448     mfxF64 ratioLocal = currRateLocal / m_initTargetRate;
3449 
3450     if (MaxRate)
3451     {
3452         minQp = SelectQp(m_laData, m_laDataStat, MaxRate * (m_laData.size() + m_laDataStat.size() - m_first), m_laData.size(), m_first);
3453     }
3454     else if ((ratio > m_thresholds.maxAvgRateRatio &&  ratioLocal > 1.00) || (ratioLocal > m_thresholds.maxRateRatioLocal))
3455     {
3456         minQp = std::max(minQp, (mfxU8)(baseQP + ((ratioLocal > criticalRatio(m_thresholds.maxAvgRateRatio)) ? 2 : 1)));
3457     }
3458 
3459     if (MinRate)
3460     {
3461         maxQp = SelectQp(m_laData, m_laDataStat, MinRate * (m_laData.size() + m_laDataStat.size() - m_first), m_laData.size(), m_first);
3462     }
3463     else  if ((ratio < m_thresholds.minAvgRateRatio && ratioLocal < 1.00) || (ratioLocal < m_thresholds.minRateRatioLocal))
3464     {
3465         maxQp = (mfxU8)(baseQP - 1);
3466     }
3467     if (m_AvgBitrate)
3468     {
3469         size_t framesForCheck = m_AvgBitrate->GetWindowSize() < (m_laData.size() - m_first) ? m_AvgBitrate->GetWindowSize() : (m_laData.size() - m_first);
3470         for (mfxU32 i = 1; i < framesForCheck; i++)
3471         {
3472            mfxF64 budget = mfxF64(m_AvgBitrate->GetBudget(i)) / (mfxF64(m_totNumMb));
3473            mfxU8  QP = SelectQp(m_laData, budget, i + m_first, m_first);
3474            if (minQp < QP)
3475            {
3476                minQp = QP;
3477                maxQp = maxQp > minQp ? maxQp : minQp;
3478            }
3479         }
3480     }
3481 
3482     if (m_hrd.get())
3483     {
3484        mfxF64 maxFrameSizeInBits =  m_hrd->GetMaxFrameSize(par.FrameType & MFX_FRAMETYPE_IDR);
3485        mfxF64 maxRatePerMb = maxFrameSizeInBits / mfxF64(m_totNumMb);
3486 
3487         for (size_t i = 0; i < m_laData.size() - m_first; i++)
3488         {
3489             mfxF64 budget = maxRatePerMb + i * m_initTargetRate;
3490             mfxU8  QP = SelectQp(m_laData, budget, 1 + i + m_first, m_first);
3491             if (minQp < QP)
3492             {
3493                 minQp = QP;
3494                 maxQp = maxQp > minQp ? maxQp : minQp;
3495             }
3496         }
3497     }
3498 
3499     if (m_curBaseQp < 0)
3500         m_curBaseQp = minQp; // first frame
3501     else if (m_curBaseQp < minQp && bLocalMin)
3502         m_curBaseQp = mfx::clamp<mfxI32>(minQp, m_curBaseQp - MAX_QP_CHANGE, m_curBaseQp + 2*MAX_QP_CHANGE);
3503     else if (m_curBaseQp < minQp)
3504         m_curBaseQp = mfx::clamp<mfxI32>(minQp, m_curBaseQp - MAX_QP_CHANGE, m_curBaseQp + MAX_QP_CHANGE);
3505     else if (m_curBaseQp > maxQp)
3506         m_curBaseQp = mfx::clamp<mfxI32>(maxQp, m_curBaseQp - MAX_QP_CHANGE, m_curBaseQp + MAX_QP_CHANGE);
3507     else
3508         ; // do not change qp if last qp guarantees target rate interval
3509     mfxU32 ind = GetFrameTypeIndex(par.FrameType);
3510     m_curQp = mfx::clamp<mfxI32>(m_curBaseQp + m_laData[m_first].deltaQp, m_QPMin[ind], m_QPMax[ind]);
3511 
3512     //printf("bqp=%2d qp=%2d dqp=%2d erate=%7.3f ", m_curBaseQp, m_curQp, m_laData[0].deltaQp, m_laData[0].estRateTotal[m_curQp]);
3513 
3514     frameCtrl.QpY = m_curQp;
3515 }
GetNewQP(mfxU32 size,mfxU32 targeSize,mfxU8 curQP)3516 mfxU8 GetNewQP(mfxU32 size, mfxU32 targeSize, mfxU8 curQP)
3517 {
3518     mfxF64 qstep     = MFX_QSTEP[std::min<mfxU8>(51, curQP)];
3519     mfxF64 qstep_new = qstep * pow((mfxF64)size / targeSize, 0.8);
3520     mfxU8  qp_new    = QStep2QpCeil(qstep_new);
3521 
3522     if (qp_new > 0 && qstep_new >(MFX_QSTEP[qp_new] + MFX_QSTEP[qp_new - 1]) / 2)
3523         --qp_new;
3524 
3525     return qp_new;
3526 }
GetQpForRecode(const BRCFrameParams & par,mfxBRCFrameCtrl & frameCtrl)3527 void LookAheadBrc2::GetQpForRecode(const BRCFrameParams& par, mfxBRCFrameCtrl &frameCtrl)
3528 {
3529     mfxI32 qp = frameCtrl.QpY;
3530     if (m_maxFrameSizeForRec < par.CodedFrameSize)
3531     {
3532         qp = GetNewQP(par.CodedFrameSize, m_maxFrameSizeForRec, (mfxU8)frameCtrl.QpY);
3533     }
3534     if (qp <= frameCtrl.QpY)
3535         qp = frameCtrl.QpY + std::max<mfxI32>(1, mfxI32(par.NumRecode));
3536 
3537     mfxU32 ind = GetFrameTypeIndex(par.FrameType);
3538 
3539     frameCtrl.QpY =  mfx::clamp(qp, (mfxI32)m_QPMin[ind], (mfxI32)m_QPMax[ind]);
3540 }
3541 
SetQp(const BRCFrameParams &,mfxBRCFrameCtrl & frameCtrl)3542 void  LookAheadBrc2::SetQp(const BRCFrameParams& /*par*/, mfxBRCFrameCtrl &frameCtrl)
3543 {
3544     m_curQp = mfxU8(mfx::clamp<mfxU32>(frameCtrl.QpY, 1, 51));
3545 }
3546 
ClearStat(mfxU32 frameOrder)3547 void LookAheadBrc2::ClearStat(mfxU32 frameOrder)
3548 {
3549     if (m_costCalcPeriod == m_lookAhead)
3550         return; // for short LA only
3551 
3552     mfxU32 lastNewFrame = frameOrder + m_lookAhead;
3553     if ((m_laDataStat.size() > 1) &&
3554         (m_laDataStat[0].encOrder + m_costCalcPeriod < lastNewFrame || m_laDataStat.size() >= m_costCalcPeriod - m_lookAhead - 1))
3555         m_laDataStat.erase(m_laDataStat.begin(), m_laDataStat.begin() + 1); // old frames are removed
3556 }
3557 
SaveStat(mfxU32 frameOrder)3558 void LookAheadBrc2::SaveStat(mfxU32 frameOrder)
3559 {
3560     if (m_costCalcPeriod == m_lookAhead)
3561         return; // for short LA only
3562 
3563     for (size_t i = 0; i < m_laData.size() && m_laData[i].encOrder < frameOrder; i++)
3564     {
3565         if (m_laDataStat.empty()
3566             || (m_laData[i].encOrder > m_laDataStat.back().encOrder))
3567             m_laDataStat.push_back(m_laData[i]);
3568     }
3569 }
3570 
PreEnc(const BRCFrameParams & par,std::vector<VmeData * > const & vmeData)3571 void LookAheadBrc2::PreEnc(const BRCFrameParams& par, std::vector<VmeData *> const & vmeData)
3572 {
3573     MFX_AUTO_LTRACE(MFX_TRACE_LEVEL_INTERNAL, "LookAheadBrc2::PreEnc");
3574 
3575     m_first = 0;
3576     ClearStat(par.EncodedOrder);
3577     SaveStat(par.EncodedOrder);
3578 
3579     size_t i = 0;
3580     for (; i < m_laData.size(); i++)
3581         if (m_laData[i].encOrder == par.EncodedOrder)
3582         {
3583             break;
3584         }
3585     if (m_AsyncDepth && (i >= m_AsyncDepth))
3586     {
3587         i = i - m_AsyncDepth;
3588         m_first = 1;
3589     }
3590     m_laData.erase(m_laData.begin(), m_laData.begin() + i);
3591 
3592 
3593     mfxU32 firstNewFrame = m_laData.empty() ? par.EncodedOrder : m_laData.back().encOrder + 1;
3594     mfxU32 lastNewFrame  = par.EncodedOrder + m_lookAhead;
3595 
3596     for (i = 0; i < vmeData.size(); i++)
3597     {
3598         if (vmeData[i]->encOrder < firstNewFrame || vmeData[i]->encOrder >= lastNewFrame)
3599             continue;
3600 
3601         LaFrameData newData = {};
3602         newData.encOrder  = vmeData[i]->encOrder;
3603         newData.poc       = vmeData[i]->poc;
3604         newData.interCost = vmeData[i]->interCost;
3605         newData.intraCost = vmeData[i]->intraCost;
3606         newData.propCost  = vmeData[i]->propCost;
3607         newData.bframe    = vmeData[i]->pocL1 != mfxU32(-1);
3608         for (size_t j = 0; j < vmeData[i]->mb.size(); j++)
3609         {
3610             mfxF64 LaMultiplier = m_LaScaleFactor * m_LaScaleFactor;
3611             MbData const & mb = vmeData[i]->mb[j];
3612             if (mb.intraMbFlag)
3613             {
3614                 for (mfxU32 qp = 0; qp < 52; qp++)
3615                     newData.estRate[qp] += LaMultiplier * mb.dist / (MFX_QSTEP[qp] * INTRA_QSTEP_COEFF);
3616             }
3617             else
3618             {
3619                 mfxU32 skipQp = GetSkippedQp(mb);
3620                 for (mfxU32 qp = 0; qp < skipQp; qp++)
3621                     newData.estRate[qp] += LaMultiplier * mb.dist / (MFX_QSTEP[qp]);
3622             }
3623         }
3624         for (mfxU32 qp = 0; qp < 52; qp++)
3625             newData.estRate[qp] /= m_totNumMb;
3626         m_laData.push_back(newData);
3627     }
3628     assert(m_laData.size() <= m_lookAhead + m_AsyncDepth);
3629 }
3630 
3631 
PreEnc(const BRCFrameParams &,std::vector<VmeData * > const &)3632 void VMEBrc::PreEnc(const BRCFrameParams& /*par*/, std::vector<VmeData *> const & /*vmeData*/)
3633 {
3634 }
3635 
Report(const BRCFrameParams & par,mfxU32,mfxU32 maxFrameSize,mfxBRCFrameCtrl & frameCtrl)3636 mfxU32 LookAheadBrc2::Report(const BRCFrameParams& par, mfxU32 /* userDataLength */, mfxU32  maxFrameSize, mfxBRCFrameCtrl &frameCtrl)
3637 {
3638     MFX_AUTO_LTRACE(MFX_TRACE_LEVEL_INTERNAL, "LookAheadBrc2::Report");
3639     mfxF64 realRatePerMb = 8 * par.CodedFrameSize / mfxF64(m_totNumMb);
3640     mfxU32 maxFS = maxFrameSize? maxFrameSize*8 : 0xFFFFFFF;
3641 
3642     mfxI32 qp = mfx::clamp(frameCtrl.QpY, 1, 51);
3643 
3644     if ((m_skipped == 1) && ((par.FrameType & MFX_FRAMETYPE_B)!=0) && par.NumRecode < 100)
3645         return 3;  // skip mode for this frame
3646 
3647     m_skipped = (par.NumRecode < 100) ? 0 : 1;  //frame was skipped (panic mode)
3648                                          //we will skip all frames until next reference]
3649     if (m_AvgBitrate)
3650         maxFS = std::min(maxFS, m_AvgBitrate->GetMaxFrameSize(m_skipped > 0, (par.FrameType & MFX_FRAMETYPE_I) != 0, par.NumRecode));
3651 
3652     if ((8 * par.CodedFrameSize + 24) > maxFS)
3653     {
3654         m_maxFrameSizeForRec = maxFS / 8; // for recoding
3655         return 1;
3656     }
3657 
3658     if (m_AvgBitrate)
3659         m_AvgBitrate->UpdateSlidingWindow(8 * par.CodedFrameSize, par.EncodedOrder, m_skipped>0, (par.FrameType & MFX_FRAMETYPE_I)!=0, par.NumRecode, qp);
3660     if (m_hrd.get())
3661         m_hrd->RemoveAccessUnit(par.CodedFrameSize, 0, 0);
3662 
3663     m_framesBehind++;
3664     m_bitsBehind += realRatePerMb;
3665 
3666     mfxF64 rateCalcPeriod = (mfxF64)(std::min(m_rateCalcPeriod, m_framesBehind));
3667     m_currRate = ((rateCalcPeriod - 1.0)*m_currRate + realRatePerMb) / rateCalcPeriod;
3668 
3669     mfxF64 oldCoeff = m_rateCoeffHistory[qp].GetCoeff();
3670     mfxF64 y = std::max(0.0, realRatePerMb);
3671     mfxF64 x = m_laData[0].estRate[qp];
3672     mfxF64 minY = NORM_EST_RATE * INIT_RATE_COEFF[qp] * MIN_RATE_COEFF_CHANGE;
3673     mfxF64 maxY = NORM_EST_RATE * INIT_RATE_COEFF[qp] * MAX_RATE_COEFF_CHANGE;
3674     y = mfx::clamp(y / x * NORM_EST_RATE, minY, maxY);
3675     m_rateCoeffHistory[qp].Add(NORM_EST_RATE, y);
3676     mfxF64 ratio = m_rateCoeffHistory[qp].GetCoeff() / oldCoeff;
3677     mfxI32 signed_qp = qp;
3678     for (mfxI32 i = -m_qpUpdateRange; i <= m_qpUpdateRange; i++)
3679         if (i != 0 && signed_qp + i >= 0 && signed_qp + i < 52)
3680         {
3681             mfxF64 r = ((ratio - 1.0) * (1.0 - abs(i)/(m_qpUpdateRange + 1)) + 1.0);
3682             m_rateCoeffHistory[signed_qp + i].Add(NORM_EST_RATE,
3683                 NORM_EST_RATE * m_rateCoeffHistory[signed_qp + i].GetCoeff() * r);
3684         }
3685 
3686     brcprintf("rrate=%6.3f newCoeff=%5.3f\n", realRatePerMb, m_rateCoeffHistory[qp].GetCoeff());
3687 
3688     return 0;
3689 }
3690 
Report(const BRCFrameParams & par,mfxU32,mfxU32 maxFrameSize,mfxBRCFrameCtrl & frameCtrl)3691 mfxU32 VMEBrc::Report(const BRCFrameParams& par, mfxU32 /*userDataLength*/, mfxU32  maxFrameSize, mfxBRCFrameCtrl &frameCtrl)
3692 {
3693     MFX_AUTO_LTRACE(MFX_TRACE_LEVEL_INTERNAL, "LookAheadBrc2::Report");
3694     mfxF64 realRatePerMb = 8 * par.CodedFrameSize / mfxF64(m_totNumMb);
3695 
3696     mfxU32 maxFS = maxFrameSize ? maxFrameSize*8 : 0xFFFFFFF;
3697     mfxI32 qp = mfx::clamp(frameCtrl.QpY, 1, 51);
3698 
3699     if ((m_skipped == 1) && ((par.FrameType & MFX_FRAMETYPE_B)!=0) && par.NumRecode < 100)
3700         return 3;  // skip mode for this frame
3701 
3702     m_skipped = (par.NumRecode < 100) ? 0 : 1;  //frame was skipped (panic mode)
3703                                                 //we will skip all frames until next reference
3704     if (m_AvgBitrate)
3705         maxFS = std::min(maxFS, m_AvgBitrate->GetMaxFrameSize(m_skipped > 0, (par.FrameType & MFX_FRAMETYPE_I) != 0, par.NumRecode));
3706 
3707     if ((8 * par.CodedFrameSize + 24) > maxFS)
3708     {
3709         m_maxFrameSize = maxFS/8; // for recoding
3710         return 1;
3711     }
3712 
3713     if (m_AvgBitrate)
3714         m_AvgBitrate->UpdateSlidingWindow(8 * par.CodedFrameSize, par.EncodedOrder, m_skipped>0,(par.FrameType & MFX_FRAMETYPE_I)!=0, par.NumRecode, qp);
3715 
3716     m_framesBehind++;
3717     m_bitsBehind += realRatePerMb;
3718 
3719     std::list<LaFrameData>::iterator start = m_laData.begin();
3720     for(;start != m_laData.end(); ++start)
3721     {
3722         if ((*start).dispOrder == par.DisplayOrder)
3723             break;
3724     }
3725     mfxU32 numFrames = 0;
3726     for (std::list<LaFrameData>::iterator it = start; it != m_laData.end(); ++it)
3727         numFrames++;
3728 
3729     numFrames = std::min(numFrames, m_lookAhead);
3730 
3731     if (start != m_laData.end())
3732     {
3733 
3734         mfxF64 framesBeyond = mfxF64(std::max(2u, numFrames - 1) - 1);
3735 
3736         m_targetRateMax = (m_initTargetRate * (m_framesBehind + (m_lookAhead - 1)) - m_bitsBehind) / framesBeyond;
3737         m_targetRateMin = (m_initTargetRate * (m_framesBehind + (framesBeyond   )) - m_bitsBehind) / framesBeyond;
3738 
3739         //printf("Target: Max %f, Min %f, framesBeyond %f, m_framesBehind %d, m_bitsBehind %f, m_lookAhead %d, picOrder %d, m_laData[0] %d, delta %d, qp %d \n", m_targetRateMax, m_targetRateMin, framesBeyond, m_framesBehind, m_bitsBehind, m_lookAhead, picOrder, (*start).encOrder, (*start).deltaQp, qp);
3740 
3741         mfxF64 oldCoeff = m_rateCoeffHistory[qp].GetCoeff();
3742         mfxF64 y = std::max(0.0, realRatePerMb);
3743         mfxF64 x = (*start).estRate[qp];
3744         mfxF64 minY = NORM_EST_RATE * INIT_RATE_COEFF[qp] * MIN_RATE_COEFF_CHANGE;
3745         mfxF64 maxY = NORM_EST_RATE * INIT_RATE_COEFF[qp] * MAX_RATE_COEFF_CHANGE;
3746         y = mfx::clamp(y / x * NORM_EST_RATE, minY, maxY);
3747         m_rateCoeffHistory[qp].Add(NORM_EST_RATE, y);
3748 
3749         //static int count = 0;
3750         //count++;
3751         //if(FILE *dump = fopen("dump.txt", "a"))
3752         //{
3753         //    fprintf(dump, "%4d %4d %4d %4d %6f\n", count, frameType, dataLength, m_curQp, y);
3754         //    fclose(dump);
3755         //}
3756 
3757         mfxF64 ratio = m_rateCoeffHistory[qp].GetCoeff() / oldCoeff;
3758         mfxI32 signed_qp = qp;
3759         for (mfxI32 i = -m_qpUpdateRange; i <= m_qpUpdateRange; i++)
3760             if (i != 0 && signed_qp + i >= 0 && signed_qp + i < 52)
3761             {
3762                 mfxF64 r = ((ratio - 1.0) * (1.0 - abs(i)/(m_qpUpdateRange + 1)) + 1.0);
3763                 m_rateCoeffHistory[signed_qp + i].Add(NORM_EST_RATE,
3764                     NORM_EST_RATE * m_rateCoeffHistory[signed_qp + i].GetCoeff() * r);
3765             }
3766 
3767         brcprintf("rrate=%6.3f newCoeff=%5.3f\n", realRatePerMb, m_rateCoeffHistory[qp].GetCoeff());
3768         (*start).bNotUsed = 1;
3769     }
3770 
3771     return 0;
3772 }
3773 
GetQp(const BRCFrameParams & par,mfxBRCFrameCtrl & frameCtrl)3774 void VMEBrc::GetQp(const BRCFrameParams& par, mfxBRCFrameCtrl &frameCtrl)
3775 {
3776     MFX_AUTO_LTRACE(MFX_TRACE_LEVEL_INTERNAL, "VMEBrc::GetQp");
3777 
3778     mfxF64 totalEstRate[52] = { 0.0 };
3779     if (!m_laData.size())
3780     {
3781         frameCtrl.QpY = 26;
3782         return;
3783     }
3784     std::list<LaFrameData>::iterator start = m_laData.begin();
3785     while (start != m_laData.end())
3786     {
3787         if ((*start).encOrder == par.EncodedOrder)
3788             break;
3789         ++start;
3790     }
3791 
3792     if (start == m_laData.end())
3793         return;
3794 
3795     std::list<LaFrameData>::iterator it = start;
3796     mfxU32 numberOfFrames = 0;
3797     for(it = start;it != m_laData.end(); ++it)
3798         numberOfFrames++;
3799 
3800     numberOfFrames = std::min(numberOfFrames, m_lookAhead);
3801 
3802 
3803     // fill totalEstRate
3804     it = start;
3805     for(mfxU32 i=0; i < numberOfFrames ; i++)
3806     {
3807         for (mfxU32 qp = 0; qp < 52; qp++)
3808         {
3809 
3810             (*it).estRateTotal[qp] = std::max(MIN_EST_RATE, m_rateCoeffHistory[qp].GetCoeff() * (*it).estRate[qp]);
3811             totalEstRate[qp] += (*it).estRateTotal[qp];
3812         }
3813         ++it;
3814     }
3815 
3816     mfxI32 maxDeltaQp = INT_MIN;
3817     if (m_lookAhead > 0)
3818     {
3819         mfxI32 curQp = m_curBaseQp < 0 ? SelectQp(totalEstRate, m_targetRateMin * numberOfFrames) : m_curBaseQp;
3820         mfxF64 strength = 0.03 * curQp + .75;
3821 
3822         it = start;
3823         for (mfxU32 i=0; i < numberOfFrames ; i++)
3824         {
3825             mfxU32 intraCost    = (*it).intraCost;
3826             mfxU32 interCost    = (*it).interCost;
3827             mfxU32 propCost     = (*it).propCost;
3828             mfxF64 ratio        = 1.0;//mfxF64(interCost) / intraCost;
3829             mfxF64 deltaQp      = log((intraCost + propCost * ratio) / intraCost) / log(2.0);
3830             (*it).deltaQp = (interCost >= intraCost * 0.9)
3831                 ? -mfxI32(deltaQp * 2 * strength + 0.5)
3832                 : -mfxI32(deltaQp * 1 * strength + 0.5);
3833             maxDeltaQp = std::max(maxDeltaQp, it->deltaQp);
3834             //printf("%d intra %d inter %d prop %d currQP %d delta %f(%d)\n", (*it).encOrder, intraCost/4, interCost/4, propCost/4, curQp, deltaQp, (*it).deltaQp );
3835             ++it;
3836         }
3837     }
3838     else
3839     {
3840         it = start;
3841         for (mfxU32 i=0; i < numberOfFrames ; i++)
3842         {
3843             mfxU32 intraCost    = (*it).intraCost;
3844             mfxU32 interCost    = (*it).interCost;
3845             (*it).deltaQp = (interCost >= intraCost * 0.9) ? -5 : (*it).bframe ? 0 : -2;
3846 
3847             maxDeltaQp = std::max(maxDeltaQp, it->deltaQp);
3848             ++it;
3849         }
3850     }
3851 
3852    it = start;
3853    for (mfxU32 i=0; i < numberOfFrames ; i++)
3854    {
3855         (*it).deltaQp -= maxDeltaQp;
3856         ++it;
3857    }
3858 
3859     mfxU8 minQp = SelectQp(start,m_laData.end(), m_targetRateMax * numberOfFrames);
3860     mfxU8 maxQp = SelectQp(start,m_laData.end(), m_targetRateMin * numberOfFrames);
3861 
3862 
3863     if (m_AvgBitrate)
3864     {
3865         size_t framesForCheck = m_AvgBitrate->GetWindowSize() < numberOfFrames ? m_AvgBitrate->GetWindowSize() : numberOfFrames;
3866         for (mfxU32 i = 1; i < framesForCheck; i ++)
3867         {
3868            mfxF64 budget = mfxF64(m_AvgBitrate->GetBudget(i))/(mfxF64(m_totNumMb));
3869            mfxU8  QP = SelectQp(start,m_laData.end(), budget, i);
3870            if (minQp <  QP)
3871            {
3872                minQp  = QP;
3873                maxQp = maxQp > minQp ? maxQp : minQp;
3874            }
3875         }
3876     }
3877 
3878     if (m_curBaseQp < 0)
3879         m_curBaseQp = minQp; // first frame
3880     else if (m_curBaseQp < minQp)
3881         m_curBaseQp = mfx::clamp<mfxI32>(minQp, m_curBaseQp - MAX_QP_CHANGE, m_curBaseQp + MAX_QP_CHANGE);
3882     else if (m_curQp > maxQp)
3883         m_curBaseQp = mfx::clamp<mfxI32>(maxQp, m_curBaseQp - MAX_QP_CHANGE, m_curBaseQp + MAX_QP_CHANGE);
3884     else
3885         ; // do not change qp if last qp guarantees target rate interval
3886 
3887     mfxU32 ind = GetFrameTypeIndex(par.FrameType);
3888     m_curQp = mfx::clamp<mfxI32>(m_curBaseQp + (*start).deltaQp, m_QPMin[ind], m_QPMax[ind]);
3889 
3890 
3891     brcprintf("bqp=%2d qp=%2d dqp=%2d erate=%7.3f ", m_curBaseQp, m_curQp, (*start).deltaQp, (*start).estRateTotal[m_curQp]);
3892 
3893     frameCtrl.QpY = mfxU8(m_curQp);
3894 }
GetQpForRecode(const BRCFrameParams & par,mfxBRCFrameCtrl & frameCtrl)3895 void VMEBrc::GetQpForRecode(const BRCFrameParams& par, mfxBRCFrameCtrl &frameCtrl)
3896 {
3897     mfxI32 qp = frameCtrl.QpY;
3898     if (m_maxFrameSize > par.CodedFrameSize)
3899     {
3900         qp = GetNewQP(par.CodedFrameSize, m_maxFrameSize, (mfxU8)frameCtrl.QpY);
3901     }
3902     if (qp <= frameCtrl.QpY)
3903         qp = frameCtrl.QpY + std::max<mfxI32>(1, par.NumRecode);
3904 
3905     mfxU32 ind = GetFrameTypeIndex(par.FrameType);
3906 
3907     frameCtrl.QpY = mfx::clamp(qp, (mfxI32)m_QPMin[ind], (mfxI32)m_QPMax[ind]);
3908 }
Init(MfxVideoParam & video)3909 mfxStatus LookAheadCrfBrc::Init(MfxVideoParam  & video)
3910 {
3911     mfxExtCodingOption2 const & extOpt2 = GetExtBufferRef(video);
3912 
3913     m_lookAhead  = extOpt2.LookAheadDepth;
3914     m_crfQuality = video.mfx.ICQQuality;
3915     m_totNumMb   = video.mfx.FrameInfo.Width * video.mfx.FrameInfo.Height / 256;
3916 
3917     m_intraCost = 0;
3918     m_interCost = 0;
3919     m_propCost  = 0;
3920 
3921     SetMinMaxQP(extOpt2, m_QPMin, m_QPMax);
3922     return MFX_ERR_NONE;
3923 }
3924 
GetQp(const BRCFrameParams & par,mfxBRCFrameCtrl & frameCtrl)3925 void LookAheadCrfBrc::GetQp(const BRCFrameParams& par, mfxBRCFrameCtrl &frameCtrl)
3926 {
3927     mfxF64 strength = 0.03 * m_crfQuality + .75;
3928     mfxF64 ratio    = 1.0;
3929     mfxF64 deltaQpF = log((m_intraCost + m_propCost * ratio) / m_intraCost) / log(2.0);
3930 
3931     mfxI32 deltaQp = (m_interCost >= m_intraCost * 0.9)
3932         ? -mfxI32(deltaQpF * 2 * strength + 0.5)
3933         : -mfxI32(deltaQpF * 1 * strength + 0.5);
3934 
3935     mfxU32 ind = GetFrameTypeIndex(par.FrameType);
3936     m_curQp = mfx::clamp<mfxI32>(m_crfQuality + deltaQp, m_QPMin[ind], m_QPMax[ind]); // driver doesn't support qp=0
3937 
3938     frameCtrl.QpY = mfxU8(m_curQp);
3939 }
GetQpForRecode(const BRCFrameParams & par,mfxBRCFrameCtrl & frameCtrl)3940 void LookAheadCrfBrc::GetQpForRecode(const BRCFrameParams& par, mfxBRCFrameCtrl &frameCtrl)
3941 {
3942     mfxI32 qp = frameCtrl.QpY + par.NumRecode;
3943 
3944     mfxU32 ind = GetFrameTypeIndex(par.FrameType);
3945 
3946     frameCtrl.QpY = mfx::clamp(qp, (mfxI32)m_QPMin[ind], (mfxI32)m_QPMax[ind]);
3947 }
PreEnc(const BRCFrameParams & par,std::vector<VmeData * > const & vmeData)3948 void LookAheadCrfBrc::PreEnc(const BRCFrameParams& par, std::vector<VmeData *> const & vmeData)
3949 {
3950     for (size_t i = 0; i < vmeData.size(); i++)
3951     {
3952         if (vmeData[i]->encOrder == par.EncodedOrder)
3953         {
3954             m_intraCost = vmeData[i]->intraCost;
3955             m_interCost = vmeData[i]->interCost;
3956             m_propCost =  vmeData[i]->propCost;
3957         }
3958     }
3959 }
3960 
Report(const BRCFrameParams &,mfxU32,mfxU32,mfxBRCFrameCtrl &)3961 mfxU32 LookAheadCrfBrc::Report(const BRCFrameParams& /*par*/,  mfxU32 /*userDataLength*/, mfxU32 /* maxFrameSize */, mfxBRCFrameCtrl &/*frameCtrl*/)
3962 {
3963     return 0;
3964 }
3965 
Hrd()3966 Hrd::Hrd()
3967     :m_bitrate(0)
3968     , m_rcMethod(0)
3969     , m_hrdIn90k(0)
3970     , m_tick(0)
3971     , m_trn_cur(0)
3972     , m_taf_prv(0)
3973     , m_bIsHrdRequired(false)
3974 {
3975 }
3976 
Setup(MfxVideoParam const & par)3977 void Hrd::Setup(MfxVideoParam const & par)
3978 {
3979     mfxExtCodingOption & opts = GetExtBufferRef(par);
3980     if (!IsOn(opts.NalHrdConformance))
3981     {
3982         // hrd control isn't required for BRC methods above
3983         m_bIsHrdRequired = false;
3984         return;
3985     }
3986 
3987     m_bIsHrdRequired = true;
3988 
3989     m_rcMethod = par.mfx.RateControlMethod;
3990     if (m_rcMethod != MFX_RATECONTROL_CBR &&
3991         m_rcMethod != MFX_RATECONTROL_VBR &&
3992         m_rcMethod != MFX_RATECONTROL_WIDI_VBR)
3993         m_rcMethod = MFX_RATECONTROL_VBR;
3994 
3995 // MVC BD {
3996     // for ViewOutput mode HRD should be controlled for every view separately
3997 
3998     if (IsMvcProfile(par.mfx.CodecProfile) && opts.ViewOutput == MFX_CODINGOPTION_ON)
3999     {
4000         m_bitrate  = GetMaxBitrateValue(par.calcParam.mvcPerViewPar.maxKbps) << (6 + SCALE_FROM_DRIVER);
4001         m_hrdIn90k = mfxU32(8000.0 * par.calcParam.mvcPerViewPar.bufferSizeInKB / m_bitrate * 90000.0);
4002     }
4003     else
4004     {
4005         m_bitrate  = GetMaxBitrateValue(par.calcParam.maxKbps) << (6 + SCALE_FROM_DRIVER);
4006         m_hrdIn90k = mfxU32(8000.0 * par.calcParam.bufferSizeInKB / m_bitrate * 90000.0);
4007     }
4008 // MVC BD }
4009     m_tick     = 0.5 * par.mfx.FrameInfo.FrameRateExtD / par.mfx.FrameInfo.FrameRateExtN;
4010 
4011     m_taf_prv = 0.0;
4012 // MVC BD {
4013     if (IsMvcProfile(par.mfx.CodecProfile) && opts.ViewOutput == MFX_CODINGOPTION_ON)
4014         m_trn_cur = double(8000) * par.calcParam.mvcPerViewPar.initialDelayInKB / m_bitrate;
4015     else
4016         m_trn_cur = double(8000) * par.calcParam.initialDelayInKB / m_bitrate;
4017 // MVC BD }
4018 
4019     m_trn_cur = GetInitCpbRemovalDelay() / 90000.0;
4020 }
4021 
Reset(MfxVideoParam const & par)4022 void Hrd::Reset(MfxVideoParam const & par)
4023 {
4024     if (m_bIsHrdRequired == false)
4025         return;
4026 
4027     m_bitrate  = GetMaxBitrateValue(par.calcParam.maxKbps) << (6 + SCALE_FROM_DRIVER);
4028     m_hrdIn90k = mfxU32(8000.0 * par.calcParam.bufferSizeInKB / m_bitrate * 90000.0);
4029 }
4030 
RemoveAccessUnit(mfxU32 size,mfxU32 interlace,mfxU32 bufferingPeriod)4031 void Hrd::RemoveAccessUnit(mfxU32 size, mfxU32 interlace, mfxU32 bufferingPeriod)
4032 {
4033     if (m_bIsHrdRequired == false)
4034         return;
4035 
4036     mfxU32 initDelay = GetInitCpbRemovalDelay();
4037 
4038     double tai_earliest = bufferingPeriod
4039         ? m_trn_cur - (initDelay / 90000.0)
4040         : m_trn_cur - (m_hrdIn90k / 90000.0);
4041 
4042     double tai_cur = (m_rcMethod == MFX_RATECONTROL_VBR)
4043         ? std::max(m_taf_prv, tai_earliest)
4044         : m_taf_prv;
4045 
4046     m_taf_prv = tai_cur + double(8) * size / m_bitrate;
4047     m_trn_cur += m_tick * (interlace ? 1 : 2);
4048 }
4049 
GetInitCpbRemovalDelay() const4050 mfxU32 Hrd::GetInitCpbRemovalDelay() const
4051 {
4052     if (m_bIsHrdRequired == false)
4053         return 0;
4054 
4055     double delay = std::max(0.0, m_trn_cur - m_taf_prv);
4056     mfxU32 initialCpbRemovalDelay = mfxU32(90000 * delay + 0.5);
4057 
4058     return initialCpbRemovalDelay == 0
4059         ? 1 // should not be equal to 0
4060         : initialCpbRemovalDelay > m_hrdIn90k && m_rcMethod == MFX_RATECONTROL_VBR
4061             ? m_hrdIn90k // should not exceed hrd buffer
4062             : initialCpbRemovalDelay;
4063 }
4064 
GetInitCpbRemovalDelayOffset() const4065 mfxU32 Hrd::GetInitCpbRemovalDelayOffset() const
4066 {
4067     if (m_bIsHrdRequired == false)
4068         return 0;
4069 
4070     // init_cpb_removal_delay + init_cpb_removal_delay_offset should be constant
4071     return m_hrdIn90k - GetInitCpbRemovalDelay();
4072 }
GetMaxFrameSize(mfxU32 bufferingPeriod) const4073 mfxU32 Hrd::GetMaxFrameSize(mfxU32 bufferingPeriod) const
4074 {
4075     if (m_bIsHrdRequired == false)
4076         return 0;
4077 
4078     mfxU32 initDelay = GetInitCpbRemovalDelay();
4079 
4080     double tai_earliest = (bufferingPeriod)
4081         ? m_trn_cur - (initDelay / 90000.0)
4082         : m_trn_cur - (m_hrdIn90k / 90000.0);
4083 
4084     double tai_cur = (m_rcMethod == MFX_RATECONTROL_VBR)
4085         ? std::max(m_taf_prv, tai_earliest)
4086         : m_taf_prv;
4087 
4088     mfxU32 maxFrameSize = (mfxU32)((m_trn_cur - tai_cur)*m_bitrate);
4089     //printf("MaxFrame %d, tai_cur %f, max_taf_cur %f\n", maxFrameSize, tai_cur,  tai_cur + (mfxF64)maxFrameSize / m_bitrate);
4090     return  maxFrameSize;
4091 }
4092 
4093 
4094 
4095 
InputBitstream(mfxU8 const * buf,size_t size,bool hasStartCode,bool doEmulationControl)4096 InputBitstream::InputBitstream(
4097     mfxU8 const * buf,
4098     size_t        size,
4099     bool          hasStartCode,
4100     bool          doEmulationControl)
4101 : m_buf(buf)
4102 , m_ptr(buf)
4103 , m_bufEnd(buf + size)
4104 , m_bitOff(0)
4105 , m_doEmulationControl(doEmulationControl)
4106 {
4107     if (hasStartCode)
4108         m_ptr = m_buf = SkipStartCode(m_buf, m_bufEnd);
4109 }
4110 
InputBitstream(mfxU8 const * buf,mfxU8 const * bufEnd,bool hasStartCode,bool doEmulationControl)4111 InputBitstream::InputBitstream(
4112     mfxU8 const * buf,
4113     mfxU8 const * bufEnd,
4114     bool          hasStartCode,
4115     bool          doEmulationControl)
4116 : m_buf(buf)
4117 , m_ptr(buf)
4118 , m_bufEnd(bufEnd)
4119 , m_bitOff(0)
4120 , m_doEmulationControl(doEmulationControl)
4121 {
4122     if (hasStartCode)
4123         m_ptr = m_buf = SkipStartCode(m_buf, m_bufEnd);
4124 }
4125 
NumBitsRead() const4126 mfxU32 InputBitstream::NumBitsRead() const
4127 {
4128     return mfxU32(8 * (m_ptr - m_buf) + m_bitOff);
4129 }
4130 
NumBitsLeft() const4131 mfxU32 InputBitstream::NumBitsLeft() const
4132 {
4133     return mfxU32(8 * (m_bufEnd - m_ptr) - m_bitOff);
4134 }
4135 
GetBit()4136 mfxU32 InputBitstream::GetBit()
4137 {
4138     if (m_ptr >= m_bufEnd)
4139         throw EndOfBuffer();
4140 
4141     mfxU32 bit = (*m_ptr >> (7 - m_bitOff)) & 1;
4142 
4143     if (++m_bitOff == 8)
4144     {
4145         ++m_ptr;
4146         m_bitOff = 0;
4147 
4148         if (m_doEmulationControl &&
4149             m_ptr - m_buf >= 2 && (m_bufEnd - m_ptr) >= 1 &&
4150             *m_ptr == 0x3 && *(m_ptr - 1) == 0 && *(m_ptr - 2) == 0 && (*(m_ptr + 1) & 0xfc) == 0)
4151         {
4152             ++m_ptr; // skip start code emulation prevention byte (0x03)
4153         }
4154     }
4155 
4156     return bit;
4157 }
4158 
GetBits(mfxU32 nbits)4159 mfxU32 InputBitstream::GetBits(mfxU32 nbits)
4160 {
4161     mfxU32 bits = 0;
4162     for (; nbits > 0; --nbits)
4163     {
4164         bits <<= 1;
4165         bits |= GetBit();
4166     }
4167 
4168     return bits;
4169 }
4170 
GetUe()4171 mfxU32 InputBitstream::GetUe()
4172 {
4173     mfxU32 zeroes = 0;
4174     while (GetBit() == 0)
4175         ++zeroes;
4176 
4177     return zeroes == 0 ? 0 : ((1 << zeroes) | GetBits(zeroes)) - 1;
4178 }
4179 
GetSe()4180 mfxI32 InputBitstream::GetSe()
4181 {
4182     mfxU32 val = GetUe();
4183     mfxU32 sign = (val & 1);
4184     val = (val + 1) >> 1;
4185     return sign ? val : -mfxI32(val);
4186 }
4187 
OutputBitstream(mfxU8 * buf,size_t size,bool emulationControl)4188 OutputBitstream::OutputBitstream(mfxU8 * buf, size_t size, bool emulationControl)
4189 : m_buf(buf)
4190 , m_ptr(buf)
4191 , m_bufEnd(buf + size)
4192 , m_bitOff(0)
4193 , m_emulationControl(emulationControl)
4194 {
4195     if (m_ptr < m_bufEnd)
4196         *m_ptr = 0; // clear next byte
4197 }
4198 
OutputBitstream(mfxU8 * buf,mfxU8 * bufEnd,bool emulationControl)4199 OutputBitstream::OutputBitstream(mfxU8 * buf, mfxU8 * bufEnd, bool emulationControl)
4200 : m_buf(buf)
4201 , m_ptr(buf)
4202 , m_bufEnd(bufEnd)
4203 , m_bitOff(0)
4204 , m_emulationControl(emulationControl)
4205 {
4206     if (m_ptr < m_bufEnd)
4207         *m_ptr = 0; // clear next byte
4208 }
4209 
GetNumBits() const4210 mfxU32 OutputBitstream::GetNumBits() const
4211 {
4212     return mfxU32(8 * (m_ptr - m_buf) + m_bitOff);
4213 }
4214 
PutBit(mfxU32 bit)4215 void OutputBitstream::PutBit(mfxU32 bit)
4216 {
4217     if (m_ptr >= m_bufEnd)
4218         throw EndOfBuffer();
4219 
4220     mfxU8 mask = mfxU8(0xff << (8 - m_bitOff));
4221     mfxU8 newBit = mfxU8((bit & 1) << (7 - m_bitOff));
4222     *m_ptr = (*m_ptr & mask) | newBit;
4223 
4224     if (++m_bitOff == 8)
4225     {
4226         if (m_emulationControl && m_ptr - 2 >= m_buf &&
4227             (*m_ptr & 0xfc) == 0 && *(m_ptr - 1) == 0 && *(m_ptr - 2) == 0)
4228         {
4229             if (m_ptr + 1 >= m_bufEnd)
4230                 throw EndOfBuffer();
4231 
4232             *(m_ptr + 1) = *(m_ptr + 0);
4233             *(m_ptr + 0) = 0x03;
4234             m_ptr++;
4235         }
4236 
4237         m_bitOff = 0;
4238         m_ptr++;
4239         if (m_ptr < m_bufEnd)
4240             *m_ptr = 0; // clear next byte
4241     }
4242 }
4243 
PutBits(mfxU32 val,mfxU32 nbits)4244 void OutputBitstream::PutBits(mfxU32 val, mfxU32 nbits)
4245 {
4246     assert(nbits <= 32);
4247 
4248     for (; nbits > 0; nbits--)
4249         PutBit((val >> (nbits - 1)) & 1);
4250 }
4251 
PutUe(mfxU32 val)4252 void OutputBitstream::PutUe(mfxU32 val)
4253 {
4254     if (val == 0)
4255     {
4256         PutBit(1);
4257     }
4258     else
4259     {
4260         val++;
4261         mfxU32 nbits = 1;
4262         while (val >> nbits)
4263             nbits++;
4264 
4265         PutBits(0, nbits - 1);
4266         PutBits(val, nbits);
4267     }
4268 }
4269 
PutSe(mfxI32 val)4270 void OutputBitstream::PutSe(mfxI32 val)
4271 {
4272     (val <= 0)
4273         ? PutUe(-2 * val)
4274         : PutUe( 2 * val - 1);
4275 }
4276 
PutTrailingBits()4277 void OutputBitstream::PutTrailingBits()
4278 {
4279     PutBit(1);
4280     while (m_bitOff != 0)
4281         PutBit(0);
4282 }
4283 
PutRawBytes(mfxU8 const * begin,mfxU8 const * end)4284 void OutputBitstream::PutRawBytes(mfxU8 const * begin, mfxU8 const * end)
4285 {
4286     assert(m_bitOff == 0);
4287 
4288     if (m_bufEnd - m_ptr < end - begin)
4289         throw EndOfBuffer();
4290 
4291     MFX_INTERNAL_CPY(m_ptr, begin, (uint32_t)(end - begin));
4292     m_bitOff = 0;
4293     m_ptr += end - begin;
4294 
4295     if (m_ptr < m_bufEnd)
4296         *m_ptr = 0;
4297 }
4298 
PutFillerBytes(mfxU8 filler,mfxU32 nbytes)4299 void OutputBitstream::PutFillerBytes(mfxU8 filler, mfxU32 nbytes)
4300 {
4301     assert(m_bitOff == 0);
4302 
4303     if (m_ptr + nbytes > m_bufEnd)
4304         throw EndOfBuffer();
4305 
4306     memset(m_ptr, filler, nbytes);
4307     m_ptr += nbytes;
4308 }
4309 
PutSeiHeader(OutputBitstream & bs,mfxU32 payloadType,mfxU32 payloadSize)4310 void MfxHwH264Encode::PutSeiHeader(
4311     OutputBitstream & bs,
4312     mfxU32            payloadType,
4313     mfxU32            payloadSize)
4314 {
4315     while (payloadType >= 255)
4316     {
4317         bs.PutBits(0xff, 8);
4318         payloadType -= 255;
4319     }
4320 
4321     bs.PutBits(payloadType, 8);
4322 
4323     while (payloadSize >= 255)
4324     {
4325         bs.PutBits(0xff, 8);
4326         payloadSize -= 255;
4327     }
4328 
4329     bs.PutBits(payloadSize, 8);
4330 }
4331 
PutSeiMessage(OutputBitstream & bs,mfxExtAvcSeiBufferingPeriod const & msg)4332 void MfxHwH264Encode::PutSeiMessage(
4333     OutputBitstream &                   bs,
4334     mfxExtAvcSeiBufferingPeriod const & msg)
4335 {
4336     mfxU32 const dataSizeInBytes = CalculateSeiSize(msg);
4337 
4338     PutSeiHeader(bs, SEI_TYPE_BUFFERING_PERIOD, dataSizeInBytes);
4339     bs.PutUe(msg.seq_parameter_set_id);
4340 
4341     for (mfxU32 i = 0; i < msg.nal_cpb_cnt; i++)
4342     {
4343         bs.PutBits(msg.nal_initial_cpb_removal_delay[i], msg.initial_cpb_removal_delay_length);
4344         bs.PutBits(msg.nal_initial_cpb_removal_delay_offset[i], msg.initial_cpb_removal_delay_length);
4345     }
4346 
4347     for (mfxU32 i = 0; i < msg.vcl_cpb_cnt; i++)
4348     {
4349         bs.PutBits(msg.vcl_initial_cpb_removal_delay[i], msg.initial_cpb_removal_delay_length);
4350         bs.PutBits(msg.vcl_initial_cpb_removal_delay_offset[i], msg.initial_cpb_removal_delay_length);
4351     }
4352 
4353     if (bs.GetNumBits() & 7)
4354     {
4355         bs.PutBit(1);
4356         while (bs.GetNumBits() & 7)
4357             bs.PutBit(0);
4358     }
4359 }
4360 
PutSeiMessage(OutputBitstream & bs,mfxExtPictureTimingSEI const & extPt,mfxExtAvcSeiPicTiming const & msg)4361 void MfxHwH264Encode::PutSeiMessage(
4362     OutputBitstream &              bs,
4363     mfxExtPictureTimingSEI const & extPt,
4364     mfxExtAvcSeiPicTiming const &  msg)
4365 {
4366     mfxU32 const dataSizeInBytes = CalculateSeiSize(extPt, msg);
4367 
4368     PutSeiHeader(bs, SEI_TYPE_PIC_TIMING, dataSizeInBytes);
4369 
4370     if (msg.cpb_dpb_delays_present_flag)
4371     {
4372         bs.PutBits(msg.cpb_removal_delay, msg.cpb_removal_delay_length);
4373         bs.PutBits(msg.dpb_output_delay, msg.dpb_output_delay_length);
4374     }
4375 
4376     if (msg.pic_struct_present_flag)
4377     {
4378         assert(msg.pic_struct <= 8);
4379         mfxU32 numClockTS = NUM_CLOCK_TS[std::min<mfxU8>(msg.pic_struct, 8)];
4380 
4381         bs.PutBits(msg.pic_struct, 4);
4382         for (mfxU32 i = 0; i < numClockTS; i ++)
4383         {
4384             bs.PutBit(extPt.TimeStamp[i].ClockTimestampFlag);
4385             if (extPt.TimeStamp[i].ClockTimestampFlag)
4386             {
4387                 mfxU32 ctType = (extPt.TimeStamp[i].CtType == 0xffff)
4388                     ? msg.ct_type                // based on picstruct
4389                     : extPt.TimeStamp[i].CtType; // user-defined
4390 
4391                 bs.PutBits(ctType, 2);
4392                 bs.PutBit (extPt.TimeStamp[i].NuitFieldBasedFlag);
4393                 bs.PutBits(extPt.TimeStamp[i].CountingType, 5);
4394                 bs.PutBit (extPt.TimeStamp[i].FullTimestampFlag);
4395                 bs.PutBit (extPt.TimeStamp[i].DiscontinuityFlag);
4396                 bs.PutBit (extPt.TimeStamp[i].CntDroppedFlag);
4397                 bs.PutBits(extPt.TimeStamp[i].NFrames, 8);
4398 
4399                 if (extPt.TimeStamp[i].FullTimestampFlag)
4400                 {
4401                     bs.PutBits(extPt.TimeStamp[i].SecondsValue, 6);
4402                     bs.PutBits(extPt.TimeStamp[i].MinutesValue, 6);
4403                     bs.PutBits(extPt.TimeStamp[i].HoursValue,   5);
4404                 }
4405                 else
4406                 {
4407                     bs.PutBit(extPt.TimeStamp[i].SecondsFlag);
4408                     if (extPt.TimeStamp[i].SecondsFlag)
4409                     {
4410                         bs.PutBits(extPt.TimeStamp[i].SecondsValue, 6);
4411                         bs.PutBit(extPt.TimeStamp[i].MinutesFlag);
4412                         if (extPt.TimeStamp[i].MinutesFlag)
4413                         {
4414                             bs.PutBits(extPt.TimeStamp[i].MinutesValue, 6);
4415                             bs.PutBit(extPt.TimeStamp[i].HoursFlag);
4416                             if (extPt.TimeStamp[i].HoursFlag)
4417                                 bs.PutBits(extPt.TimeStamp[i].HoursValue, 5);
4418                         }
4419                     }
4420                 }
4421 
4422                 bs.PutBits(extPt.TimeStamp[i].TimeOffset, msg.time_offset_length);
4423             }
4424         }
4425     }
4426 
4427     if (bs.GetNumBits() & 7)
4428     {
4429         bs.PutBit(1);
4430         while (bs.GetNumBits() & 7)
4431             bs.PutBit(0);
4432     }
4433 }
4434 
PutSeiMessage(OutputBitstream & bs,mfxExtAvcSeiDecRefPicMrkRep const & msg)4435 void MfxHwH264Encode::PutSeiMessage(
4436     OutputBitstream &                   bs,
4437     mfxExtAvcSeiDecRefPicMrkRep const & msg)
4438 {
4439     mfxU32 const dataSizeInBytes = CalculateSeiSize(msg);
4440 
4441     PutSeiHeader(bs, SEI_TYPE_DEC_REF_PIC_MARKING_REPETITION, dataSizeInBytes);
4442 
4443     bs.PutBit(msg.original_idr_flag);
4444     bs.PutUe(msg.original_frame_num);
4445     if (msg.original_field_info_present_flag)
4446     {
4447         bs.PutBit(msg.original_field_pic_flag);
4448         if (msg.original_field_pic_flag)
4449             bs.PutBit(msg.original_bottom_field_flag);
4450     }
4451 
4452     // put dec_ref_pic_marking() syntax
4453     if (msg.original_idr_flag) {
4454         bs.PutBit(msg.no_output_of_prior_pics_flag);
4455         bs.PutBit(msg.long_term_reference_flag);
4456     }
4457     else {
4458         bs.PutBit(msg.adaptive_ref_pic_marking_mode_flag);
4459         for (mfxU32 i = 0; i < msg.num_mmco_entries; i ++) {
4460             bs.PutUe(msg.mmco[i]);
4461             bs.PutUe(msg.value[2 * i]);
4462             if (msg.mmco[i] == 3)
4463                 bs.PutUe(msg.value[2 * i + 1]);
4464         }
4465     }
4466 
4467     if (bs.GetNumBits() & 7)
4468     {
4469         bs.PutBit(1);
4470         while (bs.GetNumBits() & 7)
4471             bs.PutBit(0);
4472     }
4473 }
4474 
PutSeiMessage(OutputBitstream & bs,mfxExtAvcSeiRecPoint const & msg)4475 void MfxHwH264Encode::PutSeiMessage(
4476         OutputBitstream &    bs,
4477         mfxExtAvcSeiRecPoint const & msg)
4478 {
4479     mfxU32 const dataSizeInBytes = CalculateSeiSize(msg);
4480 
4481     PutSeiHeader(bs, SEI_TYPE_RECOVERY_POINT, dataSizeInBytes);
4482 
4483     bs.PutUe(msg.recovery_frame_cnt);
4484     bs.PutBit(msg.exact_match_flag);
4485     bs.PutBit(msg.broken_link_flag);
4486     bs.PutBits(msg.changing_slice_group_idc, 2);
4487 
4488     if (bs.GetNumBits() & 7)
4489     {
4490         bs.PutBit(1);
4491         while (bs.GetNumBits() & 7)
4492             bs.PutBit(0);
4493     }
4494 }
4495 
4496 
4497 // MVC BD {
PutSeiMessage(OutputBitstream & bs,mfxU32 needBufferingPeriod,mfxU32 needPicTimingSei,mfxU32 fillerSize,MfxVideoParam const & video,mfxExtAvcSeiBufferingPeriod const & msg_bp,mfxExtPictureTimingSEI const & extPt,mfxExtAvcSeiPicTiming const & msg_pt)4498 void MfxHwH264Encode::PutSeiMessage(
4499     OutputBitstream &                   bs,
4500     mfxU32 needBufferingPeriod,
4501     mfxU32 needPicTimingSei,
4502     mfxU32 fillerSize,
4503     MfxVideoParam const & video,
4504     mfxExtAvcSeiBufferingPeriod const & msg_bp,
4505     mfxExtPictureTimingSEI const & extPt,
4506     mfxExtAvcSeiPicTiming const &  msg_pt)
4507 {
4508 
4509     if (needBufferingPeriod == 0 && needPicTimingSei == 0 && fillerSize == 0)
4510         return;
4511 
4512     mfxExtMVCSeqDesc & extMvc = GetExtBufferRef(video);
4513 
4514     mfxU32 dataSizeInBytes = 2; // hardcoded 2 bytes for MVC nested SEI syntax prior sei_messages (1 view in op)
4515 
4516     if (needBufferingPeriod)
4517     {
4518         dataSizeInBytes += 2; // hardcoded 2 bytes on BP sei_message() header. TODO: calculate real size of header
4519         dataSizeInBytes += CalculateSeiSize(msg_bp); // calculate size of BP SEI payload
4520     }
4521 
4522     if (needPicTimingSei)
4523     {
4524         dataSizeInBytes += 2; // hardcoded 2 bytes on PT sei_message() header. TODO: calculate real size of header
4525         dataSizeInBytes += CalculateSeiSize(extPt, msg_pt); // calculate size of PT SEI payload
4526     }
4527 
4528     if (fillerSize)
4529     {
4530         fillerSize -= fillerSize / 256; // compensate part of header
4531         dataSizeInBytes += 1; // last_payload_type_byte for filler payload
4532         dataSizeInBytes += (fillerSize + 254) / 255; // ff_bytes + last_payload_size_byte
4533         dataSizeInBytes += fillerSize; // filler payload
4534     }
4535 
4536     PutSeiHeader(bs, SEI_TYPE_MVC_SCALABLE_NESTING, dataSizeInBytes);
4537 
4538     bs.PutBit(1); // put operation_point_flag = 1
4539     bs.PutUe(0); // put num_view_components_op_minus1 = 0
4540     bs.PutBits(extMvc.View[1].ViewId, 10); // put sei_op_view_id[0]
4541     bs.PutBits(0, 3); // sei_op_temporal_id
4542     bs.PutBits(0, 1); // put sei_nesting_zero_bits
4543 
4544     if (needBufferingPeriod)
4545         PutSeiMessage(bs, msg_bp);
4546     if (needPicTimingSei)
4547         PutSeiMessage(bs, extPt, msg_pt);
4548     if (fillerSize)
4549     {
4550         // how many bytes takes to encode payloadSize depends on size of sei message
4551         // need to compensate it
4552         PutSeiHeader(bs, SEI_TYPE_FILLER_PAYLOAD, fillerSize);
4553         bs.PutFillerBytes(0xff, fillerSize);
4554     }
4555 
4556 }
4557 // MVC BD }
4558 
MfxFrameAllocResponse()4559 MfxFrameAllocResponse::MfxFrameAllocResponse()
4560     : m_cmDestroy(0)
4561     , m_core(0)
4562     , m_cmDevice(0)
4563     , m_numFrameActualReturnedByAllocFrames(0)
4564 {
4565     Zero((mfxFrameAllocResponse &)*this);
4566 }
4567 
~MfxFrameAllocResponse()4568 MfxFrameAllocResponse::~MfxFrameAllocResponse()
4569 {
4570     if (m_core)
4571     {
4572         if (MFX_HW_D3D11  == m_core->GetVAType() && m_responseQueue.size())
4573         {
4574             for (size_t i = 0; i < m_responseQueue.size(); i++)
4575                 m_core->FreeFrames(&m_responseQueue[i]);
4576         }
4577         else
4578         {
4579             if (mids)
4580             {
4581                 NumFrameActual = m_numFrameActualReturnedByAllocFrames;
4582                 m_core->FreeFrames(this);
4583             }
4584             for (size_t i = 0; i < m_sysmems.size(); i++)
4585             {
4586                 if (m_sysmems[i])
4587                 {
4588                     CM_ALIGNED_FREE(m_sysmems[i]);
4589                     m_sysmems[i] = 0;
4590                 }
4591             }
4592         }
4593     }
4594 
4595     if (m_cmDevice)
4596     {
4597         for (size_t i = 0; i < m_mids.size(); i++)
4598             if (m_mids[i])
4599             {
4600                 m_cmDestroy(m_cmDevice, m_mids[i]);
4601                 m_mids[i] = 0;
4602             }
4603 
4604         for (size_t i = 0; i < m_sysmems.size(); i++)
4605         {
4606             if (m_sysmems[i])
4607             {
4608                 CM_ALIGNED_FREE(m_sysmems[i]);
4609                 m_sysmems[i] = 0;
4610             }
4611         }
4612     }
4613 }
4614 
DestroyBuffer(CmDevice * device,void * p)4615 void MfxFrameAllocResponse::DestroyBuffer(CmDevice * device, void * p)
4616 {
4617     device->DestroySurface((CmBuffer *&)p);
4618 }
4619 
DestroySurface(CmDevice * device,void * p)4620 void MfxFrameAllocResponse::DestroySurface(CmDevice * device, void * p)
4621 {
4622     device->DestroySurface((CmSurface2D *&)p);
4623 }
4624 
DestroySurface2DUP(CmDevice * device,void * p)4625 void MfxFrameAllocResponse::DestroySurface2DUP(CmDevice * device, void * p)
4626 {
4627     device->DestroySurface2DUP((CmSurface2DUP *&)p);
4628 }
4629 
DestroyBufferUp(CmDevice * device,void * p)4630 void MfxFrameAllocResponse::DestroyBufferUp(CmDevice * device, void * p)
4631 {
4632     device->DestroyBufferUP((CmBufferUP *&)p);
4633 }
4634 
Alloc(VideoCORE * core,mfxFrameAllocRequest & req,bool isCopyRequired,bool isAllFramesRequired)4635 mfxStatus MfxFrameAllocResponse::Alloc(
4636     VideoCORE *            core,
4637     mfxFrameAllocRequest & req,
4638     bool isCopyRequired,
4639     bool isAllFramesRequired)
4640 {
4641     if (m_core || m_cmDevice)
4642         return Error(MFX_ERR_MEMORY_ALLOC);
4643 
4644     req.NumFrameSuggested = req.NumFrameMin; // no need in 2 different NumFrames
4645 
4646     if (MFX_HW_D3D11  == core->GetVAType())
4647     {
4648         mfxFrameAllocRequest tmp = req;
4649         tmp.NumFrameMin = tmp.NumFrameSuggested = 1;
4650 
4651         m_responseQueue.resize(req.NumFrameMin);
4652         m_mids.resize(req.NumFrameMin);
4653 
4654         for (int i = 0; i < req.NumFrameMin; i++)
4655         {
4656             mfxStatus sts = core->AllocFrames(&tmp, &m_responseQueue[i],isCopyRequired);
4657             MFX_CHECK_STS(sts);
4658             m_mids[i] = m_responseQueue[i].mids[0];
4659         }
4660 
4661         mids = &m_mids[0];
4662         NumFrameActual = req.NumFrameMin;
4663     }
4664     else
4665     {
4666         mfxStatus sts = core->AllocFrames(&req, this,isCopyRequired);
4667         MFX_CHECK_STS(sts);
4668     }
4669 
4670     if (NumFrameActual < req.NumFrameMin)
4671         return MFX_ERR_MEMORY_ALLOC;
4672 
4673     m_locked.resize(req.NumFrameMin, 0);
4674     m_flag.resize(req.NumFrameMin, 0);
4675     std::fill(m_flag.begin(), m_flag.end(), 0);
4676 
4677     m_core = core;
4678     m_cmDevice = 0;
4679     m_cmDestroy = 0;
4680     m_numFrameActualReturnedByAllocFrames = NumFrameActual;
4681     if (!isAllFramesRequired)
4682         NumFrameActual = req.NumFrameMin; // no need in redundant frames
4683     return MFX_ERR_NONE;
4684 }
4685 
Alloc(VideoCORE * core,mfxFrameAllocRequest & req,mfxFrameSurface1 ** opaqSurf,mfxU32 numOpaqSurf)4686 mfxStatus MfxFrameAllocResponse::Alloc(
4687     VideoCORE *            core,
4688     mfxFrameAllocRequest & req,
4689     mfxFrameSurface1 **    opaqSurf,
4690     mfxU32                 numOpaqSurf)
4691 {
4692     if (m_core || m_cmDevice)
4693         return Error(MFX_ERR_MEMORY_ALLOC);
4694 
4695     req.NumFrameSuggested = req.NumFrameMin; // no need in 2 different NumFrames
4696 
4697     mfxStatus sts = core->AllocFrames(&req, this, opaqSurf, numOpaqSurf);
4698     MFX_CHECK_STS(sts);
4699 
4700     if (NumFrameActual < req.NumFrameMin)
4701         return MFX_ERR_MEMORY_ALLOC;
4702 
4703     m_core = core;
4704     m_cmDevice = 0;
4705     m_cmDestroy = 0;
4706     m_numFrameActualReturnedByAllocFrames = NumFrameActual;
4707     NumFrameActual = req.NumFrameMin; // no need in redundant frames
4708     return MFX_ERR_NONE;
4709 }
4710 
AllocCmBuffers(CmDevice * device,mfxFrameAllocRequest & req)4711 mfxStatus MfxFrameAllocResponse::AllocCmBuffers(
4712     CmDevice *             device,
4713     mfxFrameAllocRequest & req)
4714 {
4715     if (m_core || m_cmDevice)
4716         return Error(MFX_ERR_MEMORY_ALLOC);
4717 
4718     req.NumFrameSuggested = req.NumFrameMin;
4719     mfxU32 size = req.Info.Width * req.Info.Height;
4720 
4721     m_mids.resize(req.NumFrameMin, 0);
4722     m_locked.resize(req.NumFrameMin, 0);
4723     m_flag.resize(req.NumFrameMin, 0);
4724     std::fill(m_flag.begin(), m_flag.end(), 0);
4725     for (int i = 0; i < req.NumFrameMin; i++)
4726         m_mids[i] = CreateBuffer(device, size);
4727 
4728     NumFrameActual = req.NumFrameMin;
4729     mids = &m_mids[0];
4730 
4731     m_core     = 0;
4732     m_cmDevice = device;
4733     m_cmDestroy = &DestroyBuffer;
4734     return MFX_ERR_NONE;
4735 }
4736 
AllocCmSurfaces(CmDevice * device,mfxFrameAllocRequest & req)4737 mfxStatus MfxFrameAllocResponse::AllocCmSurfaces(
4738     CmDevice *             device,
4739     mfxFrameAllocRequest & req)
4740 {
4741     if (m_core || m_cmDevice)
4742         return Error(MFX_ERR_MEMORY_ALLOC);
4743 
4744     req.NumFrameSuggested = req.NumFrameMin;
4745 
4746     m_mids.resize(req.NumFrameMin, 0);
4747     m_locked.resize(req.NumFrameMin, 0);
4748     m_flag.resize(req.NumFrameMin, 0);
4749     std::fill(m_flag.begin(), m_flag.end(), 0);
4750     for (int i = 0; i < req.NumFrameMin; i++)
4751         m_mids[i] = CreateSurface(device, req.Info.Width, req.Info.Height, req.Info.FourCC);
4752 
4753     NumFrameActual = req.NumFrameMin;
4754     mids = &m_mids[0];
4755 
4756     m_core     = 0;
4757     m_cmDevice = device;
4758     m_cmDestroy = &DestroySurface;
4759     return MFX_ERR_NONE;
4760 }
AllocCmSurfacesUP(CmDevice * device,mfxFrameAllocRequest & req)4761 mfxStatus MfxFrameAllocResponse::AllocCmSurfacesUP(
4762     CmDevice *             device,
4763     mfxFrameAllocRequest & req)
4764 {
4765     if (m_core || m_cmDevice)
4766         return Error(MFX_ERR_MEMORY_ALLOC);
4767 
4768     req.NumFrameSuggested = req.NumFrameMin;
4769     mfxU32 size = req.Info.Width * req.Info.Height;
4770 
4771     m_mids.resize(req.NumFrameMin, 0);
4772     m_locked.resize(req.NumFrameMin, 0);
4773     m_sysmems.resize(req.NumFrameMin, 0);
4774     m_flag.resize(req.NumFrameMin, 0);
4775     std::fill(m_flag.begin(), m_flag.end(), 0);
4776 
4777     for (int i = 0; i < req.NumFrameMin; i++) {
4778         m_sysmems[i] = CM_ALIGNED_MALLOC(size, 0x1000);
4779         m_mids[i] = CreateSurface(device, m_sysmems[i], req.Info.Width, req.Info.Height, req.Info.FourCC);
4780     }
4781 
4782     NumFrameActual = req.NumFrameMin;
4783     mids = &m_mids[0];
4784 
4785     m_core = 0;
4786     m_cmDevice = device;
4787     m_cmDestroy = &DestroySurface2DUP;
4788     return MFX_ERR_NONE;
4789 }
AllocFrames(VideoCORE * core,mfxFrameAllocRequest & req)4790 mfxStatus MfxFrameAllocResponse::AllocFrames(
4791     VideoCORE *            core,
4792     mfxFrameAllocRequest & req)
4793 {
4794     if (m_core || m_cmDevice)
4795         return Error(MFX_ERR_MEMORY_ALLOC);
4796 
4797     req.NumFrameSuggested = req.NumFrameMin;
4798     mfxU32 size = req.Info.Width * req.Info.Height;
4799 
4800     m_locked.resize(req.NumFrameMin, 0);
4801     m_sysmems.resize(req.NumFrameMin, 0);
4802     m_flag.resize(req.NumFrameMin, 0);
4803     std::fill(m_flag.begin(), m_flag.end(), 0);
4804 
4805     for (int i = 0; i < req.NumFrameMin; i++) {
4806         m_sysmems[i] = CM_ALIGNED_MALLOC(size, 0x1000);
4807     }
4808 
4809     NumFrameActual = req.NumFrameMin;
4810 
4811     m_core = core;
4812     m_cmDestroy = 0;
4813     return MFX_ERR_NONE;
4814 }
UpdateResourcePointers(mfxU32 idxScd,void * memY,void * gpuSurf)4815 mfxStatus MfxFrameAllocResponse::UpdateResourcePointers(mfxU32 idxScd, void * memY, void * gpuSurf)
4816 {
4817     if (m_mids.size() < idxScd || m_sysmems.size() < idxScd)
4818         return MFX_ERR_NOT_ENOUGH_BUFFER;
4819     m_mids[idxScd] = gpuSurf;
4820     m_sysmems[idxScd] = memY;
4821     return MFX_ERR_NONE;
4822 }
4823 
AllocCmBuffersUp(CmDevice * device,mfxFrameAllocRequest & req)4824 mfxStatus MfxFrameAllocResponse::AllocCmBuffersUp(
4825     CmDevice *             device,
4826     mfxFrameAllocRequest & req)
4827 {
4828     if (m_core || m_cmDevice)
4829         return Error(MFX_ERR_MEMORY_ALLOC);
4830 
4831     req.NumFrameSuggested = req.NumFrameMin;
4832     mfxU32 size = req.Info.Width * req.Info.Height;
4833 
4834     m_mids.resize(req.NumFrameMin, 0);
4835     m_locked.resize(req.NumFrameMin, 0);
4836     m_sysmems.resize(req.NumFrameMin, 0);
4837     m_flag.resize(req.NumFrameMin, 0);
4838     std::fill(m_flag.begin(), m_flag.end(), 0);
4839 
4840     for (int i = 0; i < req.NumFrameMin; i++)
4841     {
4842         m_sysmems[i] = CM_ALIGNED_MALLOC(size, 0x1000);
4843         m_mids[i] = CreateBuffer(device, size, m_sysmems[i]);
4844     }
4845 
4846     NumFrameActual = req.NumFrameMin;
4847     mids = &m_mids[0];
4848 
4849     m_core     = 0;
4850     m_cmDevice = device;
4851     m_cmDestroy = &DestroyBufferUp;
4852     return MFX_ERR_NONE;
4853 }
4854 
Lock(mfxU32 idx)4855 mfxU32 MfxFrameAllocResponse::Lock(mfxU32 idx)
4856 {
4857     if (idx >= m_locked.size())
4858         return 0;
4859     assert(m_locked[idx] < 0xffffffff);
4860     return ++m_locked[idx];
4861 }
ClearFlag(mfxU32 idx)4862 void MfxFrameAllocResponse::ClearFlag(mfxU32 idx)
4863 {
4864     assert(idx < m_flag.size());
4865     if (idx < m_flag.size())
4866     {
4867         m_flag[idx] = 0;
4868     }
4869 }
SetFlag(mfxU32 idx,mfxU32 flag)4870 void MfxFrameAllocResponse::SetFlag(mfxU32 idx, mfxU32 flag)
4871 {
4872     assert(idx < m_flag.size());
4873     if (idx < m_flag.size())
4874     {
4875         m_flag[idx] |= flag;
4876     }
4877 }
GetFlag(mfxU32 idx) const4878 mfxU32 MfxFrameAllocResponse::GetFlag(mfxU32 idx) const
4879 {
4880     assert(idx < m_flag.size());
4881     if (idx < m_flag.size())
4882     {
4883         return m_flag[idx];
4884     }
4885     return 0;
4886 }
4887 
Unlock()4888 void MfxFrameAllocResponse::Unlock()
4889 {
4890     std::fill(m_locked.begin(), m_locked.end(), 0);
4891 }
4892 
Unlock(mfxU32 idx)4893 mfxU32 MfxFrameAllocResponse::Unlock(mfxU32 idx)
4894 {
4895     if (idx >= m_locked.size())
4896         return mfxU32(-1);
4897     assert(m_locked[idx] > 0);
4898     return --m_locked[idx];
4899 }
4900 
Locked(mfxU32 idx) const4901 mfxU32 MfxFrameAllocResponse::Locked(mfxU32 idx) const
4902 {
4903     return (idx < m_locked.size()) ? m_locked[idx] : 1;
4904 }
4905 
GetSysmemBuffer(mfxU32 idx)4906 void * MfxFrameAllocResponse::GetSysmemBuffer(mfxU32 idx)
4907 {
4908     return (idx < m_sysmems.size()) ? m_sysmems[idx] : 0;
4909 }
4910 
FindFreeResourceIndex(MfxFrameAllocResponse const & pool,mfxU32 startingFrom)4911 mfxU32 MfxHwH264Encode::FindFreeResourceIndex(
4912     MfxFrameAllocResponse const & pool,
4913     mfxU32                        startingFrom)
4914 {
4915     for (mfxU32 i = startingFrom; i < pool.NumFrameActual; i++)
4916         if (pool.Locked(i) == 0)
4917             return i;
4918     return NO_INDEX;
4919 }
4920 
AcquireResource(MfxFrameAllocResponse & pool,mfxU32 index)4921 mfxMemId MfxHwH264Encode::AcquireResource(
4922     MfxFrameAllocResponse & pool,
4923     mfxU32                  index)
4924 {
4925     if (index > pool.NumFrameActual)
4926         return MID_INVALID;
4927     pool.Lock(index);
4928     pool.ClearFlag(index);
4929     return pool.mids[index];
4930 }
4931 
AcquireResource(MfxFrameAllocResponse & pool)4932 mfxMemId MfxHwH264Encode::AcquireResource(
4933     MfxFrameAllocResponse & pool)
4934 {
4935     return AcquireResource(pool, FindFreeResourceIndex(pool));
4936 }
4937 
AcquireResourceUp(MfxFrameAllocResponse & pool,mfxU32 index)4938 mfxHDLPair MfxHwH264Encode::AcquireResourceUp(
4939     MfxFrameAllocResponse & pool,
4940     mfxU32                  index)
4941 {
4942     mfxHDLPair p = { 0, 0 };
4943     if (index > pool.NumFrameActual)
4944         return p;
4945     pool.Lock(index);
4946     pool.ClearFlag(index);
4947     p.first  = pool.mids[index];
4948     p.second = pool.GetSysmemBuffer(index);
4949     return p;
4950 }
4951 
AcquireResourceUp(MfxFrameAllocResponse & pool)4952 mfxHDLPair MfxHwH264Encode::AcquireResourceUp(
4953     MfxFrameAllocResponse & pool)
4954 {
4955     return AcquireResourceUp(pool, FindFreeResourceIndex(pool));
4956 }
4957 
ReleaseResource(MfxFrameAllocResponse & pool,mfxMemId mid)4958 void MfxHwH264Encode::ReleaseResource(
4959     MfxFrameAllocResponse & pool,
4960     mfxMemId                mid)
4961 {
4962     for (mfxU32 i = 0; i < pool.NumFrameActual; i++)
4963     {
4964         if (pool.mids[i] == mid)
4965         {
4966             pool.Unlock(i);
4967             break;
4968         }
4969     }
4970 }
4971 
4972 
CheckEncodeFrameParam(MfxVideoParam const & video,mfxEncodeCtrl * ctrl,mfxFrameSurface1 * surface,mfxBitstream * bs,bool isExternalFrameAllocator,MFX_ENCODE_CAPS const & caps,eMFXHWType hwType)4973 mfxStatus MfxHwH264Encode::CheckEncodeFrameParam(
4974     MfxVideoParam const &     video,
4975     mfxEncodeCtrl *           ctrl,
4976     mfxFrameSurface1 *        surface,
4977     mfxBitstream *            bs,
4978     bool                      isExternalFrameAllocator,
4979     MFX_ENCODE_CAPS const &   caps,
4980     eMFXHWType                hwType)
4981 {
4982     mfxStatus checkSts = MFX_ERR_NONE;
4983     MFX_CHECK_NULL_PTR1(bs);
4984 
4985     // arbitrary reference field polarity is supported starting BDW
4986     bool isHwSupportArbRef =  (hwType >= MFX_HW_BDW);
4987 
4988     if(IsOn(video.mfx.LowPower) && ctrl){
4989         //LowPower can't encode low QPs
4990         if(ctrl->QP != 0 && ctrl->QP < 10 ){
4991             ctrl->QP = 10;
4992             checkSts = MFX_WRN_INCOMPATIBLE_VIDEO_PARAM;
4993         }
4994     }
4995 
4996     if (video.Protected == 0)
4997     {
4998         mfxU32 bufferSizeInKB = 0;
4999         if (video.calcParam.cqpHrdMode)
5000             bufferSizeInKB = video.calcParam.decorativeHrdParam.bufferSizeInKB;
5001         else if (IsMvcProfile(video.mfx.CodecProfile))
5002             bufferSizeInKB = video.calcParam.mvcPerViewPar.bufferSizeInKB;
5003         else
5004             bufferSizeInKB = video.calcParam.bufferSizeInKB;
5005 
5006         MFX_CHECK(bs->DataOffset <= bs->MaxLength, MFX_ERR_UNDEFINED_BEHAVIOR);
5007         MFX_CHECK(bs->DataOffset + bs->DataLength + bufferSizeInKB * 1000u <= bs->MaxLength,
5008             MFX_ERR_NOT_ENOUGH_BUFFER);
5009         MFX_CHECK_NULL_PTR1(bs->Data);
5010     }
5011 
5012     if (video.mfx.EncodedOrder == 1 &&
5013         video.mfx.RateControlMethod != MFX_RATECONTROL_LA_EXT)
5014     {
5015         MFX_CHECK(surface != 0, MFX_ERR_MORE_DATA);
5016         MFX_CHECK_NULL_PTR1(ctrl);
5017 
5018         mfxU16 firstFieldType  = ctrl->FrameType & 0x07;
5019         mfxU16 secondFieldType = (ctrl->FrameType >> 8) & 0x07;
5020 
5021         // check frame type
5022         MFX_CHECK(
5023             firstFieldType == MFX_FRAMETYPE_I ||
5024             firstFieldType == MFX_FRAMETYPE_P ||
5025             firstFieldType == MFX_FRAMETYPE_B,
5026             MFX_ERR_INVALID_VIDEO_PARAM);
5027 
5028         if (secondFieldType)
5029         {
5030             MFX_CHECK(
5031                 firstFieldType == MFX_FRAMETYPE_I ||
5032                 firstFieldType == MFX_FRAMETYPE_P ||
5033                 firstFieldType == MFX_FRAMETYPE_B,
5034                 MFX_ERR_INVALID_VIDEO_PARAM);
5035 
5036             // check compatibility of fields types
5037             MFX_CHECK(
5038                 firstFieldType == secondFieldType ||
5039                 (firstFieldType == MFX_FRAMETYPE_I && secondFieldType == MFX_FRAMETYPE_P) ||
5040                 (firstFieldType == MFX_FRAMETYPE_P && secondFieldType == MFX_FRAMETYPE_I && isHwSupportArbRef),
5041                 MFX_ERR_INVALID_VIDEO_PARAM);
5042         }
5043     }
5044     else if (video.mfx.EncodedOrder != 1)
5045     {
5046         if (ctrl != 0 && ctrl->FrameType)
5047         {
5048             // check FrameType for forced key-frame generation
5049             mfxU16 type = ctrl->FrameType & (MFX_FRAMETYPE_IPB | MFX_FRAMETYPE_xIPB);
5050             MFX_CHECK(
5051                 type   == (MFX_FRAMETYPE_I)                     ||
5052                 type   == (MFX_FRAMETYPE_I | MFX_FRAMETYPE_xI)  ||
5053                 type   == (MFX_FRAMETYPE_I | MFX_FRAMETYPE_xP)  ||
5054                 ((type == (MFX_FRAMETYPE_P | MFX_FRAMETYPE_xI)) && isHwSupportArbRef),
5055                 MFX_ERR_INVALID_VIDEO_PARAM);
5056         }
5057     }
5058 
5059     if (surface != 0)
5060     {
5061         // Check Runtime extension buffers if not buffered frames processing
5062         if (ctrl != 0 && ctrl->NumExtParam)
5063         {
5064             checkSts = CheckRunTimeExtBuffers(video, ctrl, surface, bs, caps, hwType);
5065             if (checkSts < MFX_ERR_NONE) { return checkSts; }
5066         }
5067         else
5068         {
5069             // FEI frame control buffer is mandatory for encoding
5070             mfxExtFeiParam const & feiParam = GetExtBufferRef(video);
5071             // FEI encoding without any extension buffers provided is impossible
5072             MFX_CHECK(feiParam.Func != MFX_FEI_FUNCTION_ENCODE, MFX_ERR_UNDEFINED_BEHAVIOR);
5073         }
5074 
5075         mfxExtOpaqueSurfaceAlloc & extOpaq = GetExtBufferRef(video);
5076         bool opaq = extOpaq.In.Surfaces != 0;
5077 
5078         MFX_CHECK((surface->Data.Y == 0) == (surface->Data.UV == 0), MFX_ERR_UNDEFINED_BEHAVIOR);
5079         MFX_CHECK(surface->Data.Pitch < 0x8000, MFX_ERR_UNDEFINED_BEHAVIOR);
5080         MFX_CHECK(surface->Data.Y != 0 || isExternalFrameAllocator || opaq, MFX_ERR_UNDEFINED_BEHAVIOR);
5081         MFX_CHECK((surface->Data.Y == 0 && surface->Data.MemId == 0) || !opaq, MFX_ERR_UNDEFINED_BEHAVIOR);
5082         MFX_CHECK(surface->Info.Width >= video.mfx.FrameInfo.Width, MFX_ERR_INVALID_VIDEO_PARAM);
5083         MFX_CHECK(surface->Info.Height >= video.mfx.FrameInfo.Height, MFX_ERR_INVALID_VIDEO_PARAM);
5084 
5085         mfxStatus sts = CheckRunTimePicStruct(surface->Info.PicStruct, video.mfx.FrameInfo.PicStruct);
5086         if (sts < MFX_ERR_NONE)
5087             return sts;
5088         else if (sts > MFX_ERR_NONE)
5089             checkSts = MFX_WRN_INCOMPATIBLE_VIDEO_PARAM;
5090 
5091         if (video.calcParam.cqpHrdMode)
5092         {
5093             MFX_CHECK_NULL_PTR1(ctrl);
5094             MFX_CHECK(ctrl->QP > 0 && ctrl->QP <= 51, MFX_ERR_INVALID_VIDEO_PARAM);
5095         }
5096     }
5097 
5098     if (ctrl != 0 && ctrl->NumPayload > 0)
5099     {
5100         MFX_CHECK_NULL_PTR1(ctrl->Payload);
5101 
5102         mfxStatus sts = CheckPayloads(ctrl->Payload, ctrl->NumPayload);
5103         MFX_CHECK_STS(sts);
5104     }
5105 
5106     return checkSts;
5107 }
5108 
CheckBeforeCopy(mfxExtMVCSeqDesc & dst,mfxExtMVCSeqDesc const & src)5109 mfxStatus MfxHwH264Encode::CheckBeforeCopy(mfxExtMVCSeqDesc & dst, mfxExtMVCSeqDesc const & src)
5110 {
5111     if (dst.NumViewAlloc   < src.NumView   ||
5112         dst.NumViewIdAlloc < src.NumViewId ||
5113         dst.NumOPAlloc     < src.NumOP)
5114     {
5115         dst.NumView   = src.NumView;
5116         dst.NumViewId = src.NumViewId;
5117         dst.NumOP     = src.NumOP;
5118         return MFX_ERR_NOT_ENOUGH_BUFFER;
5119     }
5120 
5121     if (dst.View == 0 || dst.ViewId == 0 || dst.OP == 0)
5122     {
5123         return MFX_ERR_NULL_PTR;
5124     }
5125 
5126     return MFX_ERR_NONE;
5127 }
5128 
CheckBeforeCopyQueryLike(mfxExtMVCSeqDesc & dst,mfxExtMVCSeqDesc const & src)5129 mfxStatus MfxHwH264Encode::CheckBeforeCopyQueryLike(mfxExtMVCSeqDesc & dst, mfxExtMVCSeqDesc const & src)
5130 {
5131     if ((dst.View   && dst.NumViewAlloc   < src.NumView)   ||
5132         (dst.ViewId && dst.NumViewIdAlloc < src.NumViewId) ||
5133         (dst.OP     && dst.NumOPAlloc     < src.NumOP))
5134     {
5135         dst.NumView   = src.NumView;
5136         dst.NumViewId = src.NumViewId;
5137         dst.NumOP     = src.NumOP;
5138         return MFX_ERR_INVALID_VIDEO_PARAM;
5139     }
5140 
5141     return MFX_ERR_NONE;
5142 }
5143 
Copy(mfxExtMVCSeqDesc & dst,mfxExtMVCSeqDesc const & src)5144 void MfxHwH264Encode::Copy(mfxExtMVCSeqDesc & dst, mfxExtMVCSeqDesc const & src)
5145 {
5146     if (dst.View)
5147     {
5148         dst.NumView = src.NumView;
5149         std::copy(src.View,   src.View   + src.NumView,   dst.View);
5150     }
5151 
5152     if (dst.ViewId)
5153     {
5154         dst.NumViewId = src.NumViewId;
5155         std::copy(src.ViewId, src.ViewId + src.NumViewId, dst.ViewId);
5156     }
5157 
5158     if (dst.OP)
5159     {
5160         dst.NumOP = src.NumOP;
5161         for (mfxU32 i = 0; i < dst.NumOP; i++)
5162         {
5163             dst.OP[i].TemporalId     = src.OP[i].TemporalId;
5164             dst.OP[i].LevelIdc       = src.OP[i].LevelIdc;
5165             dst.OP[i].NumViews       = src.OP[i].NumViews;
5166             dst.OP[i].NumTargetViews = src.OP[i].NumTargetViews;
5167             dst.OP[i].TargetViewId   = &dst.ViewId[src.OP[i].TargetViewId - src.ViewId];
5168         }
5169     }
5170 
5171     dst.NumRefsTotal = src.NumRefsTotal;
5172 }
5173 
FastCopyBufferVid2Sys(void * dstSys,void const * srcVid,mfxI32 bytes)5174 void MfxHwH264Encode::FastCopyBufferVid2Sys(void * dstSys, void const * srcVid, mfxI32 bytes)
5175 {
5176     MFX_AUTO_LTRACE(MFX_TRACE_LEVEL_INTERNAL, "Surface copy (bitstream)");
5177 
5178     assert(dstSys != 0);
5179     assert(srcVid != 0);
5180 
5181     mfxSize roi = { bytes, 1 };
5182     mfxStatus sts = FastCopy::Copy((uint8_t *)dstSys, bytes, (uint8_t *)srcVid, bytes, roi, COPY_VIDEO_TO_SYS);
5183     assert(sts == MFX_ERR_NONE);
5184     (void)sts;
5185 }
5186 
FastCopyBufferSys2Vid(void * dstVid,void const * srcSys,mfxI32 bytes)5187 void MfxHwH264Encode::FastCopyBufferSys2Vid(void * dstVid, void const * srcSys, mfxI32 bytes)
5188 {
5189     assert(dstVid != 0);
5190     assert(srcSys != 0);
5191 
5192     mfxSize roi = { bytes, 1 };
5193     mfxStatus sts = FastCopy::Copy((uint8_t *)dstVid, bytes, (uint8_t *)srcSys, bytes, roi, COPY_SYS_TO_VIDEO);
5194     assert(sts == MFX_ERR_NONE);
5195     (void)sts;
5196 }
5197 
Init(mfxU32 size)5198 void CyclicTaskPool::Init(mfxU32 size)
5199 {
5200     m_pool.resize(size);
5201     m_next = m_pool.begin();
5202 }
5203 
GetFreeTask()5204 DdiTask2ndField * CyclicTaskPool::GetFreeTask()
5205 {
5206     if (m_pool.empty())
5207         return 0;
5208     if (m_next == m_pool.end())
5209         m_next = m_pool.begin();
5210     return &*(m_next++);
5211 }
5212 
SkipStartCode(mfxU8 const * begin,mfxU8 const * end)5213 mfxU8 const * MfxHwH264Encode::SkipStartCode(mfxU8 const * begin, mfxU8 const * end)
5214 {
5215     mfxU32 threeBytePrefix = (end - begin < 3)
5216         ? 0xffffffff
5217         : (begin[0] << 16) | (begin[1] << 8) | (begin[2]);
5218 
5219     if (threeBytePrefix == 1)
5220         return begin + 3;
5221     else if (threeBytePrefix == 0 && end - begin > 3 && begin[3] == 1)
5222         return begin + 4;
5223     else
5224         return begin;
5225 }
5226 
SkipStartCode(mfxU8 * begin,mfxU8 * end)5227 mfxU8 * MfxHwH264Encode::SkipStartCode(mfxU8 * begin, mfxU8 * end)
5228 {
5229     return const_cast<mfxU8 *>(SkipStartCode(const_cast<const mfxU8 *>(begin), const_cast<const mfxU8 *>(end)));
5230 }
5231 
CreateRefListMod(ArrayDpbFrame const & dpb,std::vector<Reconstruct> const & recons,ArrayU8x33 initList,ArrayU8x33 const & modList,mfxU32 curViewIdx,mfxI32 curPicNum,bool optimize)5232 ArrayRefListMod MfxHwH264Encode::CreateRefListMod(
5233     ArrayDpbFrame const &            dpb,
5234     std::vector<Reconstruct> const & recons,
5235     ArrayU8x33                       initList,
5236     ArrayU8x33 const &               modList,
5237     mfxU32                           curViewIdx,
5238     mfxI32                           curPicNum,
5239     bool                             optimize)
5240 {
5241     assert(initList.Size() == modList.Size());
5242 
5243     ArrayRefListMod refListMod;
5244 
5245     mfxI32 picNumPred     = curPicNum;
5246     mfxI32 picViewIdxPred = -1;
5247 
5248     for (mfxU32 refIdx = 0; refIdx < modList.Size(); refIdx++)
5249     {
5250         if (optimize && initList == modList)
5251             return refListMod;
5252 
5253         if (dpb[modList[refIdx] & 0x7f].m_viewIdx != curViewIdx)
5254         {
5255             // inter-view reference reordering
5256             mfxI32 viewIdx = dpb[modList[refIdx] & 0x7f].m_viewIdx;
5257 
5258             if (viewIdx > picViewIdxPred)
5259             {
5260                 refListMod.PushBack(RefListMod(RPLM_INTERVIEW_ADD, mfxU16(viewIdx - picViewIdxPred - 1)));
5261             }
5262             else if (viewIdx < picViewIdxPred)
5263             {
5264                 refListMod.PushBack(RefListMod(RPLM_INTERVIEW_SUB, mfxU16(picViewIdxPred - viewIdx - 1)));
5265             }
5266             else
5267             {
5268                 assert(!"can't reorder ref list");
5269                 break;
5270             }
5271 
5272             for (mfxU32 cIdx = initList.Size(); cIdx > refIdx; cIdx--)
5273                 initList[cIdx] = initList[cIdx - 1];
5274             initList[refIdx] = modList[refIdx];
5275             mfxU32 nIdx = refIdx + 1;
5276             for (mfxU32 cIdx = refIdx + 1; cIdx <= initList.Size(); cIdx++)
5277                 if (dpb[initList[cIdx] & 0x7f].m_viewIdx != mfxU32(viewIdx))
5278                     initList[nIdx++] = initList[cIdx];
5279 
5280             picViewIdxPred = viewIdx;
5281         }
5282         else if (dpb[modList[refIdx] & 0x7f].m_longterm)
5283         {
5284             // long term reference reordering
5285             mfxU8 longTermPicNum = GetLongTermPicNum(recons, dpb, modList[refIdx]);
5286 
5287             refListMod.PushBack(RefListMod(RPLM_LT_PICNUM, longTermPicNum));
5288 
5289             for (mfxU32 cIdx = initList.Size(); cIdx > refIdx; cIdx--)
5290                 initList[cIdx] = initList[cIdx - 1];
5291             initList[refIdx] = modList[refIdx];
5292             mfxU32 nIdx = refIdx + 1;
5293             for (mfxU32 cIdx = refIdx + 1; cIdx <= initList.Size(); cIdx++)
5294                 if (GetLongTermPicNumF(recons, dpb, initList[cIdx]) != longTermPicNum ||
5295                     dpb[initList[cIdx] & 0x7f].m_viewIdx != curViewIdx)
5296                     initList[nIdx++] = initList[cIdx];
5297         }
5298         else
5299         {
5300             // short term reference reordering
5301             mfxI32 picNum = GetPicNum(recons, dpb, modList[refIdx]);
5302 
5303             if (picNum > picNumPred)
5304             {
5305                 mfxU16 absDiffPicNum = mfxU16(picNum - picNumPred);
5306                 refListMod.PushBack(RefListMod(RPLM_ST_PICNUM_ADD, absDiffPicNum - 1));
5307             }
5308             else if (picNum < picNumPred)
5309             {
5310                 mfxU16 absDiffPicNum = mfxU16(picNumPred - picNum);
5311                 refListMod.PushBack(RefListMod(RPLM_ST_PICNUM_SUB, absDiffPicNum - 1));
5312             }
5313             else
5314             {
5315                 assert(!"can't reorder ref list");
5316                 break;
5317             }
5318 
5319             for (mfxU32 cIdx = initList.Size(); cIdx > refIdx; cIdx--)
5320                 initList[cIdx] = initList[cIdx - 1];
5321             initList[refIdx] = modList[refIdx];
5322             mfxU32 nIdx = refIdx + 1;
5323             for (mfxU32 cIdx = refIdx + 1; cIdx <= initList.Size(); cIdx++)
5324                 if (GetPicNumF(recons, dpb, initList[cIdx]) != picNum ||
5325                     dpb[initList[cIdx] & 0x7f].m_viewIdx != curViewIdx)
5326                     initList[nIdx++] = initList[cIdx];
5327 
5328             picNumPred = picNum;
5329         }
5330     }
5331 
5332     return refListMod;
5333 }
5334 
CheckedMFX_INTERNAL_CPY(mfxU8 * dbegin,mfxU8 * dend,mfxU8 const * sbegin,mfxU8 const * send)5335 mfxU8 * MfxHwH264Encode::CheckedMFX_INTERNAL_CPY(
5336     mfxU8 *       dbegin,
5337     mfxU8 *       dend,
5338     mfxU8 const * sbegin,
5339     mfxU8 const * send)
5340 {
5341     if (dend - dbegin < send - sbegin)
5342     {
5343         assert(0);
5344         throw EndOfBuffer();
5345     }
5346 
5347     MFX_INTERNAL_CPY(dbegin, sbegin, (uint32_t)(send - sbegin));
5348     return dbegin + (send - sbegin);
5349 }
5350 
5351 
CheckedMemset(mfxU8 * dbegin,mfxU8 * dend,mfxU8 value,mfxU32 size)5352 mfxU8 * MfxHwH264Encode::CheckedMemset(
5353     mfxU8 * dbegin,
5354     mfxU8 * dend,
5355     mfxU8   value,
5356     mfxU32  size)
5357 {
5358     if (dbegin + size > dend)
5359     {
5360         assert(0);
5361         throw EndOfBuffer();
5362     }
5363 
5364     memset(dbegin, value, size);
5365     return dbegin + size;
5366 }
5367 
5368 
ReadRefPicListModification(InputBitstream & reader)5369 void MfxHwH264Encode::ReadRefPicListModification(
5370     InputBitstream & reader)
5371 {
5372     if (reader.GetBit())                    // ref_pic_list_modification_flag_l0
5373     {
5374         for (;;)
5375         {
5376             mfxU32 tmp = reader.GetUe();    // modification_of_pic_nums_idc
5377 
5378             if (tmp == RPLM_END)
5379                 break;
5380 
5381             if (tmp > RPLM_INTERVIEW_ADD)
5382             {
5383                 assert(!"bad bitstream");
5384                 throw std::logic_error(": bad bitstream");
5385             }
5386 
5387             reader.GetUe();                 // abs_diff_pic_num_minus1 or
5388                                             // long_term_pic_num or
5389                                             // abs_diff_view_idx_minus1
5390         }
5391     }
5392 }
5393 
ReadDecRefPicMarking(InputBitstream & reader,bool idrPicFlag)5394 void MfxHwH264Encode::ReadDecRefPicMarking(
5395     InputBitstream & reader,
5396     bool             idrPicFlag)
5397 {
5398     if (idrPicFlag)
5399     {
5400         reader.GetBit();                    // no_output_of_prior_pics_flag
5401         reader.GetBit();                    // long_term_reference_flag
5402     }
5403     else
5404     {
5405         mfxU32 tmp = reader.GetBit();       // adaptive_ref_pic_marking_mode_flag
5406         assert(tmp == 0 && "adaptive_ref_pic_marking_mode_flag should be 0");
5407         (void)tmp;
5408     }
5409 }
5410 
WriteRefPicListModification(OutputBitstream & writer,ArrayRefListMod const & refListMod)5411 void WriteRefPicListModification(
5412     OutputBitstream &       writer,
5413     ArrayRefListMod const & refListMod)
5414 {
5415     writer.PutBit(refListMod.Size() > 0);       // ref_pic_list_modification_flag_l0
5416     if (refListMod.Size() > 0)
5417     {
5418         for (mfxU32 i = 0; i < refListMod.Size(); i++)
5419         {
5420             writer.PutUe(refListMod[i].m_idc);  // modification_of_pic_nums_idc
5421             writer.PutUe(refListMod[i].m_diff); // abs_diff_pic_num_minus1 or
5422                                                 // long_term_pic_num or
5423                                                 // abs_diff_view_idx_minus1
5424         }
5425 
5426         writer.PutUe(RPLM_END);
5427     }
5428 }
5429 
WriteDecRefPicMarking(OutputBitstream & writer,DecRefPicMarkingInfo const & marking,bool idrPicFlag)5430 void MfxHwH264Encode::WriteDecRefPicMarking(
5431     OutputBitstream &            writer,
5432     DecRefPicMarkingInfo const & marking,
5433     bool                         idrPicFlag)
5434 {
5435     if (idrPicFlag)
5436     {
5437         writer.PutBit(marking.no_output_of_prior_pics_flag);    // no_output_of_prior_pics_flag
5438         writer.PutBit(marking.long_term_reference_flag);        // long_term_reference_flag
5439     }
5440     else
5441     {
5442         writer.PutBit(marking.mmco.Size() > 0);                 // adaptive_ref_pic_marking_mode_flag
5443         if (marking.mmco.Size())
5444         {
5445             for (mfxU32 i = 0; i < marking.mmco.Size(); i++)
5446             {
5447                 writer.PutUe(marking.mmco[i]);                  // memory_management_control_operation
5448                 writer.PutUe(marking.value[2 * i]);             // difference_of_pic_nums_minus1 or
5449                                                                 // long_term_pic_num or
5450                                                                 // long_term_frame_idx or
5451                                                                 // max_long_term_frame_idx_plus1
5452                 if (marking.mmco[i] == MMCO_ST_TO_LT)
5453                     writer.PutUe(marking.value[2 * i + 1]);     // long_term_frame_idx
5454             }
5455 
5456             writer.PutUe(MMCO_END);
5457         }
5458     }
5459 }
5460 
5461 
RePackSlice(mfxU8 * dbegin,mfxU8 * dend,mfxU8 * sbegin,mfxU8 * send,MfxVideoParam const & par,DdiTask const & task,mfxU32 fieldId)5462 mfxU8 * MfxHwH264Encode::RePackSlice(
5463     mfxU8 *               dbegin,
5464     mfxU8 *               dend,
5465     mfxU8 *               sbegin,
5466     mfxU8 *               send,
5467     MfxVideoParam const & par,
5468     DdiTask const &       task,
5469     mfxU32                fieldId)
5470 {
5471     mfxExtSpsHeader & extSps = GetExtBufferRef(par);
5472     mfxExtPpsHeader & extPps = GetExtBufferRef(par);
5473 
5474     mfxU32 num_ref_idx_l0_active_minus1     = 0;
5475     mfxU32 num_ref_idx_l1_active_minus1     = 0;
5476 
5477     mfxU32 sliceType    = 0;
5478     mfxU32 fieldPicFlag = 0;
5479 
5480     mfxU32 tmp = 0;
5481 
5482     if (extPps.entropyCodingModeFlag == 0)
5483     {
5484         // remove start code emulation prevention bytes when doing full repack for CAVLC
5485         mfxU32 zeroCount = 0;
5486         mfxU8 * write = sbegin;
5487         for (mfxU8 * read = write; read != send; ++read)
5488         {
5489             if (*read == 0x03 && zeroCount >= 2 && read + 1 != send && (*(read + 1) & 0xfc) == 0)
5490             {
5491                 // skip start code emulation prevention byte
5492                 zeroCount = 0; // drop zero count
5493             }
5494             else
5495             {
5496                 *(write++) = *read;
5497                 zeroCount = (*read == 0) ? zeroCount + 1 : 0;
5498             }
5499         }
5500     }
5501 
5502     InputBitstream  reader(sbegin, send, true, extPps.entropyCodingModeFlag == 1);
5503     OutputBitstream writer(dbegin, dend);
5504 
5505     writer.PutUe(reader.GetUe());                               // first_mb_in_slice
5506     writer.PutUe(sliceType = reader.GetUe());                   // slice_type
5507     writer.PutUe(reader.GetUe());                               // pic_parameter_set_id
5508 
5509     mfxU32 log2MaxFrameNum = extSps.log2MaxFrameNumMinus4 + 4;
5510     writer.PutBits(reader.GetBits(log2MaxFrameNum), log2MaxFrameNum);
5511 
5512     if (!extSps.frameMbsOnlyFlag)
5513     {
5514         writer.PutBit(fieldPicFlag = reader.GetBit());          // field_pic_flag
5515         if (fieldPicFlag)
5516             writer.PutBit(reader.GetBit());                     // bottom_field_flag
5517     }
5518 
5519     if (task.m_type[fieldId] & MFX_FRAMETYPE_IDR)
5520         writer.PutUe(reader.GetUe());                           // idr_pic_id
5521 
5522     if (extSps.picOrderCntType == 0)
5523     {
5524         mfxU32 log2MaxPicOrderCntLsb = extSps.log2MaxPicOrderCntLsbMinus4 + 4;
5525         writer.PutBits(reader.GetBits(log2MaxPicOrderCntLsb), log2MaxPicOrderCntLsb);
5526     }
5527 
5528     if (sliceType % 5 == 1)
5529         writer.PutBit(reader.GetBit());                         // direct_spatial_mv_pred_flag
5530 
5531     if (sliceType % 5 == 0 || sliceType % 5 == 1)
5532     {
5533         writer.PutBit(tmp = reader.GetBit());                   // num_ref_idx_active_override_flag
5534         if (tmp)
5535         {
5536             num_ref_idx_l0_active_minus1 = reader.GetUe();
5537             writer.PutUe(num_ref_idx_l0_active_minus1);                       // num_ref_idx_l0_active_minus1
5538             if (sliceType % 5 == 1)
5539             {
5540                 num_ref_idx_l1_active_minus1 = reader.GetUe();
5541                 writer.PutUe(num_ref_idx_l1_active_minus1);                   // num_ref_idx_l1_active_minus1
5542             }
5543         }
5544         else
5545         {
5546             num_ref_idx_l0_active_minus1 = extPps.numRefIdxL0DefaultActiveMinus1;
5547             num_ref_idx_l1_active_minus1 = extPps.numRefIdxL1DefaultActiveMinus1;
5548         }
5549     }
5550 
5551     if (sliceType % 5 != 2 && sliceType % 5 != 4)
5552     {
5553         ReadRefPicListModification(reader);
5554         // align size of ref pic list modification with num_ref_idx_l0_active_minus1 which is written to bitstream
5555         if (task.m_refPicList0Mod[fieldId].Size() > num_ref_idx_l0_active_minus1 + 1)
5556         {
5557             ArrayRefListMod refPicListMod = task.m_refPicList0Mod[fieldId];
5558             refPicListMod.Resize(num_ref_idx_l0_active_minus1 + 1);
5559             WriteRefPicListModification(writer, refPicListMod);
5560         }
5561         else
5562             WriteRefPicListModification(writer, task.m_refPicList0Mod[fieldId]);
5563     }
5564 
5565     if (sliceType % 5 == 1)
5566     {
5567         ReadRefPicListModification(reader);
5568         if (task.m_refPicList1Mod[fieldId].Size() > num_ref_idx_l1_active_minus1 + 1)
5569         {
5570             ArrayRefListMod refPicListMod = task.m_refPicList1Mod[fieldId];
5571             refPicListMod.Resize(num_ref_idx_l1_active_minus1 + 1);
5572             WriteRefPicListModification(writer, refPicListMod);
5573         }
5574         else
5575             WriteRefPicListModification(writer, task.m_refPicList1Mod[fieldId]);
5576     }
5577 
5578     if (task.m_type[fieldId] & MFX_FRAMETYPE_REF)
5579     {
5580         bool idrPicFlag = (task.m_type[fieldId] & MFX_FRAMETYPE_IDR) != 0;
5581         ReadDecRefPicMarking(reader, idrPicFlag);
5582         WriteDecRefPicMarking(writer, task.m_decRefPicMrk[fieldId], idrPicFlag);
5583     }
5584 
5585     if (extPps.entropyCodingModeFlag && (sliceType % 5 != 2))
5586         writer.PutUe(reader.GetUe());                           // cabac_init_idc
5587 
5588     writer.PutSe(reader.GetSe());                               // slice_qp_delta
5589 
5590     if (1/*deblocking_filter_control_present_flag*/)
5591     {
5592         writer.PutUe(tmp = reader.GetUe());                     // disable_deblocking_filter_idc
5593         if (tmp != 1)
5594         {
5595             writer.PutSe(reader.GetSe());                       // slice_alpha_c0_offset_div2
5596             writer.PutSe(reader.GetSe());                       // slice_beta_offset_div2
5597         }
5598     }
5599 
5600     if (extPps.entropyCodingModeFlag)
5601     {
5602         while (reader.NumBitsRead() % 8)
5603             reader.GetBit();                                    // cabac_alignment_one_bit
5604 
5605         mfxU32 numAlignmentBits = (8 - (writer.GetNumBits() & 0x7)) & 0x7;
5606         writer.PutBits(0xff, numAlignmentBits);
5607 
5608         sbegin += reader.NumBitsRead() / 8;
5609         dbegin += writer.GetNumBits() / 8;
5610 
5611         MFX_INTERNAL_CPY(dbegin, sbegin, (uint32_t)(send - sbegin));
5612         dbegin += send - sbegin;
5613     }
5614     else
5615     {
5616         mfxU32 bitsLeft = reader.NumBitsLeft();
5617 
5618         for (; bitsLeft > 31; bitsLeft -= 32)
5619             writer.PutBits(reader.GetBits(32), 32);
5620 
5621         writer.PutBits(reader.GetBits(bitsLeft), bitsLeft);
5622         writer.PutBits(0, (8 - (writer.GetNumBits() & 7)) & 7); // trailing zeroes
5623 
5624         assert((reader.NumBitsRead() & 7) == 0);
5625         assert((writer.GetNumBits()  & 7) == 0);
5626 
5627         sbegin += (reader.NumBitsRead() + 7) / 8;
5628         dbegin += (writer.GetNumBits()  + 7) / 8;
5629     }
5630 
5631     return dbegin;
5632 }
5633 
5634 namespace
5635 {
SelectPicTimingSei(MfxVideoParam const & video,DdiTask const & task)5636     mfxExtPictureTimingSEI const * SelectPicTimingSei(
5637         MfxVideoParam const & video,
5638         DdiTask const &       task)
5639     {
5640         if (mfxExtPictureTimingSEI const * extPt = GetExtBuffer(task.m_ctrl))
5641         {
5642             return extPt;
5643         }
5644         else
5645         {
5646             mfxExtPictureTimingSEI const * extPtGlobal = GetExtBuffer(video);
5647             return extPtGlobal;
5648         }
5649     }
5650 };
5651 
PrepareSeiMessageBuffer(MfxVideoParam const & video,DdiTask const & task,mfxU32 fieldId,PreAllocatedVector & sei)5652 void MfxHwH264Encode::PrepareSeiMessageBuffer(
5653     MfxVideoParam const & video,
5654     DdiTask const &       task,
5655     mfxU32                fieldId,
5656     PreAllocatedVector &  sei)
5657 {
5658     mfxExtCodingOption const     & extOpt  = GetExtBufferRef(video);
5659     mfxExtSpsHeader const        & extSps  = GetExtBufferRef(video);
5660     mfxExtCodingOption2 const    & extOpt2 = GetExtBufferRef(video);
5661     mfxExtPictureTimingSEI const * extPt   = SelectPicTimingSei(video, task);
5662 
5663     mfxU32 fillerSize         = task.m_fillerSize[fieldId];
5664     mfxU32 fieldPicFlag       = (task.GetPicStructForEncode() != MFX_PICSTRUCT_PROGRESSIVE);
5665     mfxU32 secondFieldPicFlag = (task.GetFirstField() != fieldId);
5666     mfxU32 idrPicFlag         = (task.m_type[fieldId] & MFX_FRAMETYPE_IDR);
5667     mfxU32 isIPicture         = (task.m_type[fieldId] & MFX_FRAMETYPE_I);
5668     mfxU32 recoveryPoint      = IsRecoveryPointSeiMessagePresent(
5669         task.m_ctrl.Payload,
5670         task.m_ctrl.NumPayload,
5671         GetPayloadLayout(fieldPicFlag, secondFieldPicFlag));
5672 
5673     mfxU32 needRecoveryPointSei = (extOpt.RecoveryPointSEI == MFX_CODINGOPTION_ON &&
5674         ((extOpt2.IntRefType && task.m_IRState.firstFrameInCycle && task.m_IRState.IntraLocation == 0) ||
5675         (extOpt2.IntRefType == 0 && isIPicture)));
5676 
5677     mfxU32 needCpbRemovalDelay = idrPicFlag || recoveryPoint || needRecoveryPointSei ||
5678         (isIPicture && extOpt2.BufferingPeriodSEI == MFX_BPSEI_IFRAME);
5679 
5680     mfxU32 needMarkingRepetitionSei =
5681         IsOn(extOpt.RefPicMarkRep) && task.m_decRefPicMrkRep[fieldId].presentFlag;
5682 
5683     mfxU32 needBufferingPeriod =
5684         (IsOn(extOpt.VuiNalHrdParameters) && needCpbRemovalDelay) ||
5685         (IsOn(extOpt.VuiVclHrdParameters) && needCpbRemovalDelay) ||
5686         (IsOn(extOpt.PicTimingSEI) &&
5687         (idrPicFlag || (isIPicture && extOpt2.BufferingPeriodSEI == MFX_BPSEI_IFRAME))); // to activate sps
5688 
5689     mfxU32 needPicTimingSei =
5690         IsOn(extOpt.VuiNalHrdParameters) ||
5691         IsOn(extOpt.VuiVclHrdParameters) ||
5692         IsOn(extOpt.PicTimingSEI);
5693 
5694     if (video.calcParam.cqpHrdMode)
5695         needBufferingPeriod = needPicTimingSei = 0; // in CQP HRD mode application inserts BP and PT SEI itself
5696 
5697     mfxU32 needAtLeastOneSei =
5698         (task.m_ctrl.NumPayload > secondFieldPicFlag && task.m_ctrl.Payload != nullptr && task.m_ctrl.Payload[secondFieldPicFlag] != nullptr) ||
5699         (fillerSize > 0)    ||
5700         needBufferingPeriod ||
5701         needPicTimingSei    ||
5702         needMarkingRepetitionSei;
5703 
5704     OutputBitstream writer(sei.Buffer(), sei.Capacity());
5705 
5706     mfxU8 const SEI_STARTCODE[5] = { 0, 0, 0, 1, 6 };
5707     if (needAtLeastOneSei && IsOn(extOpt.SingleSeiNalUnit))
5708         writer.PutRawBytes(SEI_STARTCODE, SEI_STARTCODE + sizeof(SEI_STARTCODE));
5709 
5710     mfxExtAvcSeiBufferingPeriod msgBufferingPeriod = {};
5711     {
5712         mfxExtAvcSeiPicTiming msgPicTiming;
5713 
5714         mfxU32 sps_id = extSps.seqParameterSetId;
5715         sps_id = ((sps_id + !!task.m_viewIdx) & 0x1f);  // use appropriate sps id for dependent views
5716 
5717         if (needBufferingPeriod)
5718         {
5719             PrepareSeiMessage(
5720                 task,
5721                 IsOn(extOpt.VuiNalHrdParameters),
5722                 IsOn(extOpt.VuiVclHrdParameters),
5723                 sps_id,
5724                 msgBufferingPeriod);
5725 
5726             if (IsOff(extOpt.SingleSeiNalUnit))
5727                 writer.PutRawBytes(SEI_STARTCODE, SEI_STARTCODE + sizeof(SEI_STARTCODE));
5728             PutSeiMessage(writer, msgBufferingPeriod);
5729             if (IsOff(extOpt.SingleSeiNalUnit))
5730                 writer.PutTrailingBits();
5731         }
5732 
5733         if (needPicTimingSei)
5734         {
5735             PrepareSeiMessage(
5736                 task,
5737                 fieldId,
5738                 IsOn(extOpt.VuiNalHrdParameters) || IsOn(extOpt.VuiVclHrdParameters),
5739                 msgPicTiming);
5740 
5741             if (IsOff(extOpt.SingleSeiNalUnit))
5742                 writer.PutRawBytes(SEI_STARTCODE, SEI_STARTCODE + sizeof(SEI_STARTCODE));
5743             PutSeiMessage(writer, *extPt, msgPicTiming);
5744             if (IsOff(extOpt.SingleSeiNalUnit))
5745                 writer.PutTrailingBits();
5746         }
5747     }
5748     // user-defined messages
5749     if (task.m_ctrl.Payload != nullptr)
5750     {
5751         for (mfxU32 i = secondFieldPicFlag; i < task.m_ctrl.NumPayload; i += 1 + fieldPicFlag)
5752         {
5753             if (task.m_ctrl.Payload[i] != nullptr)
5754             {
5755                 if (IsOff(extOpt.SingleSeiNalUnit))
5756                     writer.PutRawBytes(SEI_STARTCODE, SEI_STARTCODE + sizeof(SEI_STARTCODE));
5757                 for (mfxU32 b = 0; b < task.m_ctrl.Payload[i]->NumBit / 8; b++)
5758                     writer.PutBits(task.m_ctrl.Payload[i]->Data[b], 8);
5759                 if (IsOff(extOpt.SingleSeiNalUnit))
5760                     writer.PutTrailingBits();
5761             }
5762         }
5763     }
5764 
5765     if (needMarkingRepetitionSei)
5766     {
5767         mfxU8 frameMbsOnlyFlag = (video.mfx.FrameInfo.PicStruct == MFX_PICSTRUCT_PROGRESSIVE) ? 1 : 0;
5768 
5769         mfxExtAvcSeiDecRefPicMrkRep decRefPicMrkRep;
5770         PrepareSeiMessage(task, fieldId, frameMbsOnlyFlag, decRefPicMrkRep);
5771 
5772         if (IsOff(extOpt.SingleSeiNalUnit))
5773             writer.PutRawBytes(SEI_STARTCODE, SEI_STARTCODE + sizeof(SEI_STARTCODE));
5774         PutSeiMessage(writer, decRefPicMrkRep);
5775         if (IsOff(extOpt.SingleSeiNalUnit))
5776             writer.PutTrailingBits();
5777     }
5778 
5779     if (needRecoveryPointSei)
5780     {
5781         mfxExtAvcSeiRecPoint msgPicTiming;
5782         PrepareSeiMessage(video, msgPicTiming);
5783         if (IsOff(extOpt.SingleSeiNalUnit))
5784             writer.PutRawBytes(SEI_STARTCODE, SEI_STARTCODE + sizeof(SEI_STARTCODE));
5785         PutSeiMessage(writer, msgPicTiming);
5786         if (IsOff(extOpt.SingleSeiNalUnit))
5787             writer.PutTrailingBits();
5788     }
5789 
5790     if (fillerSize > 0)
5791     {
5792         // how many bytes takes to encode payloadSize depends on size of sei message
5793         // need to compensate it
5794         fillerSize -= fillerSize / 256;
5795         if (IsOff(extOpt.SingleSeiNalUnit))
5796             writer.PutRawBytes(SEI_STARTCODE, SEI_STARTCODE + sizeof(SEI_STARTCODE));
5797         PutSeiHeader(writer, SEI_TYPE_FILLER_PAYLOAD, fillerSize);
5798         writer.PutFillerBytes(0xff, fillerSize);
5799         if (IsOff(extOpt.SingleSeiNalUnit))
5800             writer.PutTrailingBits();
5801     }
5802 
5803     if (needAtLeastOneSei && IsOn(extOpt.SingleSeiNalUnit))
5804         writer.PutTrailingBits();
5805 
5806     // add repack compensation to the end of last sei NALu.
5807     // It's padding done with trailing_zero_8bits. This padding could has greater size then real repack overhead.
5808     if (task.m_addRepackSize[fieldId] && needAtLeastOneSei)
5809         writer.PutFillerBytes(0xff, task.m_addRepackSize[fieldId]);
5810 
5811     sei.SetSize(writer.GetNumBits() / 8);
5812 }
5813 
5814 // MVC BD {
PrepareSeiMessageBufferDepView(MfxVideoParam const & video,DdiTask const & task,mfxU32 fieldId,PreAllocatedVector & sei)5815 void MfxHwH264Encode::PrepareSeiMessageBufferDepView(
5816     MfxVideoParam const & video,
5817     DdiTask const &       task,
5818     mfxU32                fieldId,
5819     PreAllocatedVector &  sei)
5820 {
5821     mfxExtCodingOption const     & extOpt = GetExtBufferRef(video);
5822     mfxExtSpsHeader const        & extSps = GetExtBufferRef(video);
5823     mfxExtPictureTimingSEI const * extPt  = SelectPicTimingSei(video, task);
5824 
5825     mfxU32 fillerSize         = task.m_fillerSize[fieldId];
5826     mfxU32 fieldPicFlag       = (task.GetPicStructForEncode() != MFX_PICSTRUCT_PROGRESSIVE);
5827     mfxU32 secondFieldPicFlag = (task.GetFirstField() != fieldId);
5828     mfxU32 idrPicFlag         = (task.m_type[fieldId] & MFX_FRAMETYPE_IDR);
5829     mfxU32 recoveryPoint      = IsRecoveryPointSeiMessagePresent(
5830         task.m_ctrl.Payload,
5831         task.m_ctrl.NumPayload,
5832         GetPayloadLayout(fieldPicFlag, secondFieldPicFlag));
5833 
5834     mfxU32 needCpbRemovalDelay = idrPicFlag || recoveryPoint;
5835 
5836     mfxU32 needMarkingRepetitionSei =
5837         IsOn(extOpt.RefPicMarkRep) && task.m_decRefPicMrkRep[fieldId].presentFlag;
5838 
5839     mfxU32 needBufferingPeriod =
5840         (IsOn(extOpt.VuiNalHrdParameters) && needCpbRemovalDelay) ||
5841         (IsOn(extOpt.VuiVclHrdParameters) && needCpbRemovalDelay) ||
5842         (IsOn(extOpt.PicTimingSEI)        && idrPicFlag); // to activate sps
5843 
5844     mfxU32 needPicTimingSei =
5845         IsOn(extOpt.VuiNalHrdParameters) ||
5846         IsOn(extOpt.VuiVclHrdParameters) ||
5847         IsOn(extOpt.PicTimingSEI);
5848 
5849     mfxU32 needMvcNestingSei = needBufferingPeriod || needPicTimingSei;
5850     // for BD/AVCHD compatible encoding filler SEI should have MVC nesting wrapper
5851     if (IsOn(extOpt.ViewOutput))
5852         needMvcNestingSei |= (fillerSize != 0);
5853 
5854     mfxU32 needNotNestingSei =
5855         (task.m_ctrl.Payload && task.m_ctrl.NumPayload > 0) ||
5856         (fillerSize > 0 && IsOff(extOpt.ViewOutput)) ||
5857         needMarkingRepetitionSei;
5858 
5859     OutputBitstream writer(sei.Buffer(), sei.Capacity());
5860 
5861     mfxU8 const SEI_STARTCODE[5] = { 0, 0, 0, 1, 6 };
5862 
5863 // MVC BD {
5864     mfxExtAvcSeiBufferingPeriod msgBufferingPeriod = {};
5865     mfxExtAvcSeiPicTiming msgPicTiming;
5866     mfxU32 sps_id = extSps.seqParameterSetId;
5867     sps_id = ((sps_id + !!task.m_viewIdx) & 0x1f);  // use appropriate sps id for dependent views
5868 // MVC BD }
5869 
5870     if (needMvcNestingSei)
5871     {
5872         if (needBufferingPeriod)
5873         {
5874             PrepareSeiMessage(
5875                 task,
5876                 IsOn(extOpt.VuiNalHrdParameters),
5877                 IsOn(extOpt.VuiVclHrdParameters),
5878                 sps_id,
5879                 msgBufferingPeriod);
5880 
5881             // write NAL unit with MVC nesting SEI for BP
5882             writer.PutRawBytes(SEI_STARTCODE, SEI_STARTCODE + sizeof(SEI_STARTCODE));
5883             PutSeiMessage(writer, needBufferingPeriod, 0, 0, video, msgBufferingPeriod, *extPt, msgPicTiming);
5884             writer.PutTrailingBits();
5885         }
5886 
5887         if (needPicTimingSei)
5888         {
5889             PrepareSeiMessage(
5890                 task,
5891                 fieldId,
5892                 IsOn(extOpt.VuiNalHrdParameters) || IsOn(extOpt.VuiVclHrdParameters),
5893                 msgPicTiming);
5894 
5895             // write NAL unit with MVC nesting SEI for PT
5896             writer.PutRawBytes(SEI_STARTCODE, SEI_STARTCODE + sizeof(SEI_STARTCODE));
5897             PutSeiMessage(writer, 0, needPicTimingSei, 0, video, msgBufferingPeriod, *extPt, msgPicTiming);
5898             writer.PutTrailingBits();
5899         }
5900 
5901         if (fillerSize)
5902         {
5903             // write NAL unit with MVC nesting SEI for filler
5904             writer.PutRawBytes(SEI_STARTCODE, SEI_STARTCODE + sizeof(SEI_STARTCODE));
5905             PutSeiMessage(writer, 0, 0, fillerSize, video, msgBufferingPeriod, *extPt, msgPicTiming);
5906             writer.PutTrailingBits();
5907         }
5908     }
5909 
5910     if (needNotNestingSei && IsOn(extOpt.SingleSeiNalUnit))
5911         writer.PutRawBytes(SEI_STARTCODE, SEI_STARTCODE + sizeof(SEI_STARTCODE));
5912 
5913     // user-defined messages
5914     if (task.m_ctrl.Payload != nullptr)
5915     {
5916         for (mfxU32 i = secondFieldPicFlag; i < task.m_ctrl.NumPayload; i += 1 + fieldPicFlag)
5917         {
5918             if (task.m_ctrl.Payload[i] != nullptr)
5919             {
5920                 if (IsOff(extOpt.SingleSeiNalUnit))
5921                     writer.PutRawBytes(SEI_STARTCODE, SEI_STARTCODE + sizeof(SEI_STARTCODE));
5922                 for (mfxU32 b = 0; b < task.m_ctrl.Payload[i]->NumBit / 8; b++)
5923                     writer.PutBits(task.m_ctrl.Payload[i]->Data[b], 8);
5924                 if (IsOff(extOpt.SingleSeiNalUnit))
5925                     writer.PutTrailingBits();
5926             }
5927         }
5928     }
5929 
5930     if (needMarkingRepetitionSei)
5931     {
5932         mfxU8 frameMbsOnlyFlag = (video.mfx.FrameInfo.PicStruct == MFX_PICSTRUCT_PROGRESSIVE) ? 1 : 0;
5933 
5934         mfxExtAvcSeiDecRefPicMrkRep decRefPicMrkRep;
5935         PrepareSeiMessage(task, fieldId, frameMbsOnlyFlag, decRefPicMrkRep);
5936 
5937         if (IsOff(extOpt.SingleSeiNalUnit))
5938             writer.PutRawBytes(SEI_STARTCODE, SEI_STARTCODE + sizeof(SEI_STARTCODE));
5939         PutSeiMessage(writer, decRefPicMrkRep);
5940         if (IsOff(extOpt.SingleSeiNalUnit))
5941             writer.PutTrailingBits();
5942     }
5943 
5944     if (fillerSize > 0 && IsOff(extOpt.ViewOutput))
5945     {
5946         // how many bytes takes to encode payloadSize depends on size of sei message
5947         // need to compensate it
5948         fillerSize -= fillerSize / 256;
5949         if (IsOff(extOpt.SingleSeiNalUnit))
5950             writer.PutRawBytes(SEI_STARTCODE, SEI_STARTCODE + sizeof(SEI_STARTCODE));
5951         PutSeiHeader(writer, SEI_TYPE_FILLER_PAYLOAD, fillerSize);
5952         writer.PutFillerBytes(0xff, fillerSize);
5953         if (IsOff(extOpt.SingleSeiNalUnit))
5954             writer.PutTrailingBits();
5955     }
5956 
5957     if (needNotNestingSei && IsOn(extOpt.SingleSeiNalUnit))
5958         writer.PutTrailingBits();
5959 
5960     // w/a for SNB/IVB: padd sei buffer to compensate re-pack  of AVC headers to MVC
5961     // add repack compensation to the end of last sei NALu.
5962     if (needMvcNestingSei && task.m_addRepackSize[fieldId])
5963         writer.PutFillerBytes(0xff, task.m_addRepackSize[fieldId]);
5964 
5965     sei.SetSize(writer.GetNumBits() / 8);
5966 
5967 }
5968 // MVC BD }
5969 
IsInplacePatchNeeded(MfxVideoParam const & par,DdiTask const & task,mfxU32 fieldId)5970 bool MfxHwH264Encode::IsInplacePatchNeeded(
5971     MfxVideoParam const & par,
5972     DdiTask const &       task,
5973     mfxU32                fieldId)
5974 {
5975     mfxExtSpsHeader const & extSps = GetExtBufferRef(par);
5976     mfxExtPpsHeader const & extPps = GetExtBufferRef(par);
5977 
5978     mfxU8 constraintFlags =
5979         (extSps.constraints.set0 << 7) | (extSps.constraints.set1 << 6) |
5980         (extSps.constraints.set2 << 5) | (extSps.constraints.set3 << 4) |
5981         (extSps.constraints.set4 << 3) | (extSps.constraints.set5 << 2) |
5982         (extSps.constraints.set6 << 1) | (extSps.constraints.set7 << 0);
5983 
5984     if (task.m_nalRefIdc[fieldId] > 1)
5985         return true;
5986 
5987     return
5988         constraintFlags  != 0 ||
5989         extSps.nalRefIdc != 1 ||
5990         extPps.nalRefIdc != 1 ||
5991         extSps.gapsInFrameNumValueAllowedFlag == 1 ||
5992         (extSps.maxNumRefFrames & 1);
5993 }
5994 
IsSlicePatchNeeded(DdiTask const & task,mfxU32 fieldId)5995 bool MfxHwH264Encode::IsSlicePatchNeeded(
5996     DdiTask const & task,
5997     mfxU32          fieldId)
5998 {
5999     for (mfxU32 i = 0; i < task.m_refPicList0Mod[fieldId].Size(); i++)
6000         if (task.m_refPicList0Mod[fieldId][i].m_idc == RPLM_LT_PICNUM)
6001             return true;    // driver doesn't write reordering syntax for long term reference pictures
6002 
6003     for (mfxU32 i = 0; i < task.m_list0[fieldId].Size(); i++)
6004         if (task.m_dpb[fieldId][task.m_list0[fieldId][i] & 0x7f].m_longterm)
6005             return true; // driver insert incorrect reordering syntax when longterm ref present
6006 
6007     for (mfxU32 i = 0; i < task.m_list1[fieldId].Size(); i++)
6008         if (task.m_dpb[fieldId][task.m_list1[fieldId][i] & 0x7f].m_longterm)
6009             return true; // driver insert incorrect reordering syntax when longterm ref present
6010 
6011     bool list0ModifiedAndShortened =
6012         task.m_refPicList0Mod[fieldId].Size() > 0 &&
6013         task.m_initSizeList0[fieldId] != task.m_list0[fieldId].Size();
6014 
6015     return
6016         task.m_refPicList1Mod[fieldId].Size() > 0               || // driver doesn't write reordering syntax for List1
6017         list0ModifiedAndShortened                               || // driver doesn't write correct reordering syntax
6018                                                                    // when num_ref_idx_l0_active is different from inital
6019         task.m_decRefPicMrk[fieldId].mmco.Size() > 0            || // driver doesn't write dec_ref_pic_marking syntax
6020         task.m_decRefPicMrk[fieldId].long_term_reference_flag;     // even for idr frames
6021 }
6022 
CopyBitstream(VideoCORE & core,MfxVideoParam const & video,DdiTask const & task,mfxU32 fieldId,mfxU8 * bsData,mfxU32 bsSizeAvail)6023 mfxStatus  MfxHwH264Encode::CopyBitstream(VideoCORE           & core,
6024                                           MfxVideoParam const & video,
6025                                           DdiTask const       & task,
6026                                           mfxU32              fieldId,
6027                                           mfxU8 *             bsData,
6028                                           mfxU32              bsSizeAvail)
6029 {
6030     mfxFrameData bitstream = {};
6031 
6032     FrameLocker lock(&core, bitstream, task.m_midBit[fieldId]);
6033     MFX_CHECK(video.Protected == 0 || task.m_notProtected, MFX_ERR_UNDEFINED_BEHAVIOR);
6034     if (bitstream.Y == 0)
6035         return Error(MFX_ERR_LOCK_MEMORY);
6036     mfxU32   bsSizeToCopy  = task.m_bsDataLength[fieldId];
6037     if (bsSizeToCopy > bsSizeAvail)
6038         return Error(MFX_ERR_UNDEFINED_BEHAVIOR);
6039     FastCopyBufferVid2Sys(bsData, bitstream.Y, bsSizeToCopy);
6040     return MFX_ERR_NONE;
6041 }
UpdateSliceInfo(mfxU8 * sbegin,mfxU8 * send,mfxU32 maxSliceSize,DdiTask & task,bool & bRecoding)6042 mfxStatus MfxHwH264Encode::UpdateSliceInfo(
6043         mfxU8 *               sbegin, // contents of source buffer may be modified
6044         mfxU8 *               send,
6045         mfxU32                maxSliceSize,
6046         DdiTask &             task,
6047         bool&                 bRecoding)
6048 {
6049     mfxU32 num = 0;
6050     for (NaluIterator nalu(sbegin, send); nalu != NaluIterator(); ++nalu)
6051     {
6052         if (nalu->type == 1 || nalu->type == 5)
6053         {
6054             mfxF32 slice_len = (mfxF32) (nalu->end - nalu->begin);
6055             mfxF32 weight = (slice_len*100)/maxSliceSize;
6056             task.m_SliceInfo[num].weight = weight ;
6057             if (weight > 100)
6058                 bRecoding = true;
6059             //printf ("%d \t slice len\t%f\t%f\n", num, slice_len, task.m_SliceInfo[num].weight);
6060             num++;
6061         }
6062     }
6063    if (task.m_repack == 0 && !bRecoding)
6064    {
6065        if (num > 4)
6066        {
6067            mfxF32 weight_avg = 0;
6068            for (mfxU32 i = 0; i < num; i ++)
6069            {
6070                weight_avg += task.m_SliceInfo[i].weight;
6071            }
6072            weight_avg = weight_avg/(mfxF32)num;
6073            bRecoding = (weight_avg < 25);
6074            //if (bRecoding)
6075           //{
6076                 //printf("short slices %d, w=%f\n", num, weight_avg);
6077           //}
6078        }
6079    }
6080    return (task.m_SliceInfo.size()!= num)? MFX_ERR_UNDEFINED_BEHAVIOR : MFX_ERR_NONE;
6081 }
PatchBitstream(MfxVideoParam const & video,DdiTask const & task,mfxU32 fieldId,mfxU8 * sbegin,mfxU8 * send,mfxU8 * dbegin,mfxU8 * dend)6082 mfxU8 * MfxHwH264Encode::PatchBitstream(
6083     MfxVideoParam const & video,
6084     DdiTask const &       task,
6085     mfxU32                fieldId,
6086     mfxU8 *               sbegin, // contents of source buffer may be modified
6087     mfxU8 *               send,
6088     mfxU8 *               dbegin,
6089     mfxU8 *               dend)
6090 {
6091     mfxExtSpsHeader const & extSps = GetExtBufferRef(video);
6092     mfxExtPpsHeader const & extPps = GetExtBufferRef(video);
6093 
6094     mfxU8 constraintFlags =
6095         (extSps.constraints.set0 << 7) | (extSps.constraints.set1 << 6) |
6096         (extSps.constraints.set2 << 5) | (extSps.constraints.set3 << 4) |
6097         (extSps.constraints.set4 << 3) | (extSps.constraints.set5 << 2) |
6098         (extSps.constraints.set6 << 1) | (extSps.constraints.set7 << 0);
6099 
6100     bool copy = (sbegin != dbegin);
6101 
6102     bool prefixNalUnitNeeded = video.calcParam.numTemporalLayer > 0;
6103 
6104     bool slicePatchNeeded = copy && IsSlicePatchNeeded(task, fieldId);
6105     assert(copy || !IsSlicePatchNeeded(task, fieldId) || !"slice patching requries intermediate bitstream buffer");
6106 
6107     bool spsPresent = false;
6108 
6109     for (NaluIterator nalu(sbegin, send); nalu != NaluIterator(); ++nalu)
6110     {
6111         if (nalu->type == 7)
6112         {
6113             mfxU8 * spsBegin = dbegin;
6114             if (extSps.gapsInFrameNumValueAllowedFlag || (extSps.maxNumRefFrames & 1))
6115             {
6116                 assert(copy);
6117                 InputBitstream reader(nalu->begin + nalu->numZero + 1, nalu->end);
6118                 mfxExtSpsHeader spsHead = { };
6119                 ReadSpsHeader(reader, spsHead);
6120 
6121                 spsHead.gapsInFrameNumValueAllowedFlag = extSps.gapsInFrameNumValueAllowedFlag;
6122                 spsHead.maxNumRefFrames                = extSps.maxNumRefFrames;
6123 
6124                 OutputBitstream writer(dbegin, dend);
6125                 dbegin += WriteSpsHeader(writer, spsHead) / 8;
6126             }
6127             else
6128             {
6129                 dbegin = copy
6130                     ? CheckedMFX_INTERNAL_CPY(dbegin, dend, nalu->begin, nalu->end)
6131                     : nalu->end;
6132             }
6133 
6134             // snb and ivb driver doesn't provide controls for nal_ref_idc
6135             // if nal_ref_idc from mfxExtCodingOptionSPSPPS differs from value hardcoded in driver (1)
6136             // it needs to be patched
6137             spsBegin[nalu->numZero + 1] &= ~0x30;
6138             spsBegin[nalu->numZero + 1] |= extSps.nalRefIdc << 5;
6139 
6140             // snb and ivb driver doesn't provide controls for constraint flags in sps header
6141             // if any of them were set via mfxExtCodingOptionSPSPPS
6142             // sps header generated by driver needs to be patched
6143             // such patching doesn't change length of header
6144             spsBegin[nalu->numZero + 3] = constraintFlags;
6145             spsPresent = true;
6146         }
6147         else if (nalu->type == 8)
6148         {
6149             if (spsPresent ||               // pps always accompanies sps
6150                 !video.calcParam.tempScalabilityMode)   // mfxExtAvcTemporalLayers buffer is not present, pps to every frame
6151             {
6152                 mfxU8 * ppsBegin = dbegin;
6153                 dbegin = copy
6154                     ? CheckedMFX_INTERNAL_CPY(dbegin, dend, nalu->begin, nalu->end)
6155                     : nalu->end;
6156 
6157                 // snb and ivb driver doesn't provide controls for nal_ref_idc
6158                 // if nal_ref_idc from mfxExtCodingOptionSPSPPS differs from value hardcoded in driver (1)
6159                 // it needs to be patched
6160                 ppsBegin[nalu->numZero + 1] &= ~0x30;
6161                 ppsBegin[nalu->numZero + 1] |= extPps.nalRefIdc << 5;
6162             }
6163         }
6164         else if (nalu->type == 9)
6165         {
6166             if (!video.calcParam.tempScalabilityMode) // mfxExtAvcTemporalLayers buffer is not present, aud to every frame
6167             {
6168                 dbegin = copy
6169                     ? CheckedMFX_INTERNAL_CPY(dbegin, dend, nalu->begin, nalu->end)
6170                     : nalu->end;
6171             }
6172         }
6173         else if (nalu->type == 1 || nalu->type == 5)
6174         {
6175             if (task.m_nalRefIdc[fieldId] > 1)
6176             {
6177                 nalu->begin[nalu->numZero + 1] &= ~0x30;
6178                 nalu->begin[nalu->numZero + 1] |= task.m_nalRefIdc[fieldId] << 5;
6179             }
6180 
6181             if (prefixNalUnitNeeded)
6182             {
6183                 dbegin = PackPrefixNalUnitSvc(dbegin, dend, true, task, fieldId);
6184             }
6185 
6186             if (slicePatchNeeded)
6187             {
6188                 assert(copy || !"slice patching requries intermediate bitstream buffer");
6189                 dbegin = CheckedMFX_INTERNAL_CPY(dbegin, dend, nalu->begin, nalu->begin + nalu->numZero + 2);
6190                 dbegin = RePackSlice(dbegin, dend, nalu->begin + nalu->numZero + 2, nalu->end, video, task, fieldId);
6191             }
6192             else
6193             {
6194                 dbegin = copy
6195                     ? CheckedMFX_INTERNAL_CPY(dbegin, dend, nalu->begin, nalu->end)
6196                     : nalu->end;
6197             }
6198         }
6199         else
6200         {
6201             dbegin = copy
6202                 ? CheckedMFX_INTERNAL_CPY(dbegin, dend, nalu->begin, nalu->end)
6203                 : nalu->end;
6204         }
6205     }
6206 
6207     return dbegin;
6208 }
6209 
InsertSVCNAL(DdiTask const & task,mfxU32 fieldId,mfxU8 * sbegin,mfxU8 * send,mfxU8 * dbegin,mfxU8 * dend)6210 mfxU8 * MfxHwH264Encode::InsertSVCNAL(
6211     DdiTask const &       task,
6212     mfxU32                fieldId,
6213     mfxU8 *               sbegin, // contents of source buffer may be modified
6214     mfxU8 *               send,
6215     mfxU8 *               dbegin,
6216     mfxU8 *               dend)
6217 {
6218 
6219     bool copy = (sbegin != dbegin);
6220 
6221     for (NaluIterator nalu(sbegin, send); nalu != NaluIterator(); ++nalu)
6222     {
6223         if (nalu->type == 1 || nalu->type == 5)
6224         {
6225 
6226             dbegin = PackPrefixNalUnitSvc(dbegin, dend, true, task, fieldId);
6227             dbegin = copy
6228                     ? CheckedMFX_INTERNAL_CPY(dbegin, dend, nalu->begin, nalu->end)
6229                     : nalu->end;
6230         }
6231         else
6232         {
6233             dbegin = copy
6234                 ? CheckedMFX_INTERNAL_CPY(dbegin, dend, nalu->begin, nalu->end)
6235                 : nalu->end;
6236         }
6237     }
6238 
6239     return dbegin;
6240 }
6241 
6242 namespace
6243 {
CalcDistScaleFactor(mfxU32 pocCur,mfxU32 pocL0,mfxU32 pocL1)6244     mfxI32 CalcDistScaleFactor(
6245         mfxU32 pocCur,
6246         mfxU32 pocL0,
6247         mfxU32 pocL1)
6248     {
6249         mfxI32 tb = mfx::clamp(mfxI32(pocCur - pocL0), -128, 127);
6250         mfxI32 td = mfx::clamp(mfxI32(pocL1  - pocL0), -128, 127);
6251         mfxI32 tx = (16384 + abs(td/2)) / td;
6252 
6253         return mfx::clamp((tb * tx + 32) >> 6, -1024, 1023);
6254     }
6255 
CalcDistScaleFactor(DdiTask const & task,mfxU32 indexL0,mfxU32 indexL1)6256     mfxI32 CalcDistScaleFactor(
6257         DdiTask const & task,
6258         mfxU32 indexL0,
6259         mfxU32 indexL1)
6260     {
6261         if (indexL0 >= task.m_list0[0].Size() ||
6262             indexL1 >= task.m_list1[0].Size())
6263             return 128;
6264         mfxU32 pocL0 = task.m_dpb[0][task.m_list0[0][indexL0] & 127].m_poc[0];
6265         mfxU32 pocL1 = task.m_dpb[0][task.m_list1[0][indexL1] & 127].m_poc[0];
6266         if (pocL0 == pocL1)
6267             return 128;
6268         return CalcDistScaleFactor(task.GetPoc(0), pocL0, pocL1);
6269     }
GetMBCost(DdiTask const & task,mfxU32 nMB,mfxU32 widthMB,mfxU32 heightMB,mfxU32 widthVME,mfxU32 heightVME)6270     mfxU32 GetMBCost(DdiTask const & task, mfxU32 nMB, mfxU32 widthMB, mfxU32 heightMB, mfxU32 widthVME, mfxU32 heightVME)
6271     {
6272         mfxU32 xVME =  (mfxU32)((mfxF32)(nMB%widthMB) / ((mfxF32)widthMB/(mfxF32)widthVME));
6273         mfxU32 yVME =  (mfxU32)((mfxF32)(nMB/widthMB) / ((mfxF32)heightMB/(mfxF32)heightVME));
6274 
6275         mfxU32 mbCost = task.m_vmeData->mb[yVME*widthVME + xVME].dist;
6276         /* if (!task.m_vmeData->mb[nMB].intraMbFlag)
6277         {
6278             mbCost = task.m_cqpValue[0] < GetSkippedQp(task.m_vmeData->mb[nMB]) ? mbCost : 0;
6279         } */
6280         mbCost = mbCost > 0 ? mbCost : 1;
6281         return mbCost;
6282     }
6283 };
6284 
CalcBiWeight(DdiTask const & task,mfxU32 indexL0,mfxU32 indexL1)6285 mfxU32 MfxHwH264Encode::CalcBiWeight(
6286     DdiTask const & task,
6287     mfxU32 indexL0,
6288     mfxU32 indexL1)
6289 {
6290     mfxI32 biWeight = CalcDistScaleFactor(task, indexL0, indexL1) >> 2;
6291     return biWeight < -64 || biWeight > 128
6292         ? 32
6293         : biWeight;
6294 }
6295 
CreateBrc(MfxVideoParam const & video,MFX_ENCODE_CAPS const & hwCaps)6296 BrcIface * MfxHwH264Encode::CreateBrc(MfxVideoParam const & video, MFX_ENCODE_CAPS const & hwCaps)
6297 {
6298     mfxExtCodingOption2 const & ext = GetExtBufferRef(video);
6299 
6300     if (ext.MaxSliceSize && !IsDriverSliceSizeControlEnabled(video, hwCaps))
6301         return new UmcBrc;
6302 
6303     switch (video.mfx.RateControlMethod)
6304     {
6305     case MFX_RATECONTROL_LA:
6306     case MFX_RATECONTROL_LA_HRD: return new LookAheadBrc2;
6307     case MFX_RATECONTROL_LA_ICQ: return new LookAheadCrfBrc;
6308     case MFX_RATECONTROL_LA_EXT: return new VMEBrc;
6309 
6310     case MFX_RATECONTROL_CBR:
6311     case MFX_RATECONTROL_VBR:
6312         return new H264SWBRC;
6313     default: return new UmcBrc;
6314     }
6315 }
6316 
AddEmulationPreventionAndCopy(mfxU8 * sbegin,mfxU8 * send,mfxU8 * dbegin,mfxU8 * dend)6317 mfxU8 * MfxHwH264Encode::AddEmulationPreventionAndCopy(
6318     mfxU8 *               sbegin,
6319     mfxU8 *               send,
6320     mfxU8 *               dbegin,
6321     mfxU8 *               dend)
6322 {
6323     mfxU32 zeroCount = 0;
6324     mfxU8 * write = dbegin;
6325     for (mfxU8 * read = sbegin; read != send; ++read)
6326     {
6327         if (write > dend)
6328         {
6329             assert(0);
6330             throw EndOfBuffer();
6331         }
6332         if (zeroCount >= 2 && (*read & 0xfc) == 0)
6333         {
6334             *(write++) = 0x03;
6335             zeroCount = 0; // drop zero count
6336         }
6337         zeroCount = (*read == 0) ? zeroCount + 1 : 0;
6338         *(write++) = *read;
6339     }
6340     return write;
6341 }
FillSliceInfo(DdiTask & task,mfxU32 MaxSliceSize,mfxU32 FrameSize,mfxU32 widthLa,mfxU32 heightLa)6342 mfxStatus MfxHwH264Encode::FillSliceInfo(DdiTask &  task, mfxU32 MaxSliceSize, mfxU32 FrameSize, mfxU32 widthLa, mfxU32 heightLa)
6343 {
6344     if (MaxSliceSize == 0)  return MFX_ERR_NONE;
6345 
6346     mfxU32  numPics   = task.GetPicStructForEncode() == MFX_PICSTRUCT_PROGRESSIVE ? 1 : 2;
6347     mfxU32  numSlices = (FrameSize + MaxSliceSize-1)/MaxSliceSize;
6348     mfxU32  widthMB   =  task.m_yuv->Info.Width/16;
6349     mfxU32  heightMB  =  task.m_yuv->Info.Height/16;
6350     mfxU32  numMB = widthMB*heightMB;
6351 
6352     numSlices = (numSlices > 0)  ? numSlices : 1;
6353     numSlices = (numSlices > 255) ? 255 : numSlices;
6354 
6355 
6356     mfxU32  curMB = 0;
6357     mfxF32  maxSliceCost = 0.0;
6358     for (mfxU32 i = 0; i < numMB; i ++)
6359     {
6360        maxSliceCost = maxSliceCost + GetMBCost(task, i, widthMB, heightMB, widthLa/16, heightLa/16);
6361     }
6362     maxSliceCost = maxSliceCost/numSlices;
6363 
6364     task.m_SliceInfo.resize(numSlices);
6365     mfxU32 sliceCost = 0;
6366     mfxU32 numRealSlises = 0;
6367     mfxU32 prevCost = 0;
6368 
6369     for (size_t i = 0; i < task.m_SliceInfo.size(); ++i)
6370     {
6371         task.m_SliceInfo[i].startMB = curMB/numPics;
6372         mfxU32 numMBForSlice =  0;
6373         while (curMB < numMB)
6374         {
6375             mfxU32 mbCost = GetMBCost(task, curMB, widthMB, heightMB, widthLa/16, heightLa/16);
6376             if (((sliceCost + mbCost) > maxSliceCost * (i + 1)) && (numMBForSlice > 0) && (i < (task.m_SliceInfo.size() - 1)))
6377             {
6378                 break;
6379             }
6380             sliceCost = sliceCost  + mbCost;
6381             curMB ++;
6382             numMBForSlice ++;
6383         }
6384         task.m_SliceInfo[i].numMB  = numMBForSlice/numPics;
6385         task.m_SliceInfo[i].weight = 100;
6386         task.m_SliceInfo[i].cost =  sliceCost -prevCost;
6387         //printf("%d\t%d\n", i, task.m_SliceInfo[i].cost);
6388         prevCost = sliceCost;
6389         if (numMBForSlice) numRealSlises++;
6390     }
6391     if (numRealSlises != task.m_SliceInfo.size())
6392         task.m_SliceInfo.resize(numRealSlises);
6393 
6394     return MFX_ERR_NONE;
6395 }
CorrectSliceInfo(DdiTask & task,mfxU32 MaxSliceWeight,mfxU32 widthLa,mfxU32 heightLa)6396 mfxStatus MfxHwH264Encode::CorrectSliceInfo(DdiTask &  task, mfxU32  MaxSliceWeight, mfxU32 widthLa, mfxU32 heightLa)
6397 {
6398     if (task.m_SliceInfo.size() == 0)  return MFX_ERR_NONE;
6399 
6400     SliceStructInfo new_info[256] = {};
6401     mfxU32  new_slice = 0;
6402     mfxU32  curMB = 0;
6403     mfxU32  old_slice = 0;
6404     mfxU32  numPics   = task.GetPicStructForEncode() == MFX_PICSTRUCT_PROGRESSIVE ? 1 : 2;
6405 
6406     mfxU32  widthMB   =  task.m_yuv->Info.Width/16;
6407     mfxU32  heightMB  =  task.m_yuv->Info.Height/16;
6408     mfxU32  numMB = widthMB*heightMB;
6409 
6410 
6411     // Form new slices using VME MB data and real coded slice size
6412 
6413     for (;new_slice < 256; ++new_slice)
6414     {
6415         mfxF64  sliceWeight = 0.0;
6416         new_info[new_slice].startMB = curMB/numPics;
6417         mfxU32 numMBForSlice =  0;
6418         mfxU32 sliceCost = 0;
6419         while (curMB < numMB)
6420         {
6421             if (curMB >= task.m_SliceInfo[old_slice].startMB + task.m_SliceInfo[old_slice].numMB)
6422             {
6423                 old_slice ++;
6424             }
6425             mfxU32 mbCost = GetMBCost(task, curMB, widthMB, heightMB, widthLa/16, heightLa/16);
6426             mfxF64 mbWeight = (mfxF64) mbCost/task.m_SliceInfo[old_slice].cost*task.m_SliceInfo[old_slice].weight;
6427 
6428             if (((sliceWeight + mbWeight) > MaxSliceWeight) && (numMBForSlice > 0))
6429             {
6430                 break;
6431             }
6432             sliceWeight = sliceWeight  + mbWeight;
6433             sliceCost += mbCost;
6434             curMB ++;
6435             numMBForSlice ++;
6436         }
6437         new_info[new_slice].numMB  = numMBForSlice/numPics;
6438         new_info[new_slice].weight = 100;
6439         new_info[new_slice].cost = sliceCost;
6440         if (curMB >= numMB)
6441             break;
6442     }
6443     if (curMB < numMB)
6444         return Error(MFX_ERR_UNDEFINED_BEHAVIOR);
6445 
6446     task.m_SliceInfo.resize(new_slice + 1);
6447 
6448     for (size_t i = 0; i < task.m_SliceInfo.size(); i++)
6449     {
6450         task.m_SliceInfo[i] = new_info[i];
6451     }
6452     return MFX_ERR_NONE;
6453 }
CorrectSliceInfoForsed(DdiTask & task,mfxU32 widthLa,mfxU32 heightLa)6454 mfxStatus MfxHwH264Encode::CorrectSliceInfoForsed(DdiTask & task, mfxU32 widthLa, mfxU32 heightLa)
6455 {
6456     mfxU32 freeSlisesMax = task.m_SliceInfo.size() < 256 ? mfxU32(256 - task.m_SliceInfo.size()) : 0;
6457     if (!freeSlisesMax)
6458         return MFX_ERR_NONE;
6459 
6460     mfxU32 bigSlices[256] = {};
6461     mfxU32 numBigSlices = 0;
6462     SliceStructInfo new_info[256] = {};
6463 
6464     mfxU32  widthMB   =  task.m_yuv->Info.Width/16;
6465     mfxU32  heightMB  =  task.m_yuv->Info.Height/16;
6466 
6467 
6468     // sort big slices
6469     for (mfxU32 i = 0; i < (mfxU32)task.m_SliceInfo.size(); i++)
6470     {
6471         bigSlices[i] = i;
6472     }
6473     for (; numBigSlices < freeSlisesMax; numBigSlices++)
6474     {
6475         mfxF32 max_weight = 0;
6476         mfxU32 max_index = 0;
6477         for (size_t j = numBigSlices; j < task.m_SliceInfo.size(); j++)
6478         {
6479             if (max_weight < task.m_SliceInfo[bigSlices[j]].weight && task.m_SliceInfo[bigSlices[j]].numMB > 1)
6480             {
6481                 max_weight = task.m_SliceInfo[bigSlices[j]].weight;
6482                 max_index = (mfxU32)j;
6483             }
6484         }
6485         if (max_weight < 100)
6486             break;
6487 
6488         mfxU32 tmp = bigSlices[max_index] ;
6489         bigSlices[max_index] =bigSlices[numBigSlices];
6490         bigSlices[numBigSlices] = tmp;
6491     }
6492      mfxU32 numSlises = 0;
6493 
6494     // devide big slices
6495 
6496     for (mfxU32 i = 0; i < task.m_SliceInfo.size(); i++)
6497     {
6498         bool bBigSlice = false;
6499         for (mfxU32 j = 0; j < numBigSlices; j++)
6500         {
6501             if (bigSlices[j] == i)
6502             {
6503                 bBigSlice =  true;
6504                 break;
6505             }
6506         }
6507         if (bBigSlice)
6508         {
6509             new_info[numSlises].startMB = task.m_SliceInfo[i].startMB;
6510             new_info[numSlises].numMB = task.m_SliceInfo[i].numMB / 2;
6511             new_info[numSlises].cost = 0;
6512             for (mfxU32 n = new_info[numSlises].startMB; n < new_info[numSlises].startMB +  new_info[numSlises].numMB; n++)
6513             {
6514                 new_info[numSlises].cost += GetMBCost(task, n,  widthMB, heightMB, widthLa/16, heightLa/16);
6515             }
6516             numSlises ++;
6517             new_info[numSlises].startMB = new_info[numSlises - 1].startMB + new_info[numSlises - 1].numMB;
6518             new_info[numSlises].numMB =task.m_SliceInfo[i].numMB - new_info[numSlises - 1].numMB;
6519             new_info[numSlises].cost = 0;
6520             for (mfxU32 n = new_info[numSlises].startMB; n < new_info[numSlises].startMB +  new_info[numSlises].numMB; n++)
6521             {
6522                 new_info[numSlises].cost += GetMBCost(task, n, widthMB, heightMB, widthLa/16, heightLa/16);
6523             }
6524             numSlises ++;
6525         }
6526         else
6527         {
6528             new_info[numSlises ++] = task.m_SliceInfo[i];
6529         }
6530     }
6531     task.m_SliceInfo.resize(numSlises);
6532 
6533     for (size_t i = 0; i < task.m_SliceInfo.size(); i++)
6534     {
6535         task.m_SliceInfo[i] = new_info[i];
6536     }
6537     return MFX_ERR_NONE;
6538 }
6539 
6540 
6541 const mfxU8 rangeTabLPS[64][4] =
6542 {
6543     {   93 + 35 , 101 + 75 ,  19 + 189,  82 + 158, },
6544     {   82 + 46 , 145 + 22 , 193 + 4  ,  29 + 198, },
6545     {    5 + 123, 107 + 51 , 152 + 35 ,  72 + 144, },
6546     {  106 + 17 ,  23 + 127, 116 + 62 , 152 + 53 , },
6547     {   26 + 90 ,  33 + 109,  27 + 142, 129 + 66 , },
6548     {   37 + 74 ,  88 + 47 ,  30 + 130,   5 + 180, },
6549     {   60 + 45 ,  91 + 37 , 139 + 13 ,  96 + 79 , },
6550     {   70 + 30 ,  14 + 108, 120 + 24 , 138 + 28 , },
6551     {   31 + 64 ,   8 + 108,  80 + 57 ,  77 + 81 , },
6552     {   78 + 12 ,  29 + 81 ,  23 + 107,   1 + 149, },
6553     {   26 + 59 ,  99 + 5  ,  19 + 104,  99 + 43 , },
6554     {   21 + 60 ,  61 + 38 ,   7 + 110,  15 + 120, },
6555     {   63 + 14 ,  64 + 30 ,  76 + 35 ,  30 + 98 , },
6556     {    0 + 73 ,  54 + 35 ,   8 + 97 ,  94 + 28 , },
6557     {   25 + 44 ,  61 + 24 ,  67 + 33 ,  84 + 32 , },
6558     {   50 + 16 ,  16 + 64 ,  27 + 68 , 108 + 2  , },
6559     {   54 + 8  ,  16 + 60 ,  24 + 66 ,  43 + 61 , },
6560     {    5 + 54 ,  46 + 26 ,  65 + 21 ,  93 + 6  , },
6561     {   50 + 6  ,  57 + 12 ,  42 + 39 ,  22 + 72 , },
6562     {   51 + 2  ,  24 + 41 ,  50 + 27 ,  81 + 8  , },
6563     {   46 + 5  ,  14 + 48 ,  55 + 18 ,  76 + 9  , },
6564     {   47 + 1  ,  21 + 38 ,  26 + 43 ,  17 + 63 , },
6565     {    7 + 39 ,  31 + 25 ,  58 + 8  ,  42 + 34 , },
6566     {   39 + 4  ,   7 + 46 ,  30 + 33 ,  20 + 52 , },
6567     {    5 + 36 ,  29 + 21 ,   1 + 58 ,  29 + 40 , },
6568     {   25 + 14 ,  47 + 1  ,  15 + 41 ,  12 + 53 , },
6569     {    2 + 35 ,  10 + 35 ,  45 + 9  ,  50 + 12 , },
6570     {    3 + 32 ,  36 + 7  ,  23 + 28 ,  11 + 48 , },
6571     {   11 + 22 ,  24 + 17 ,  31 + 17 ,  15 + 41 , },
6572     {    8 + 24 ,  19 + 20 ,  17 + 29 ,   2 + 51 , },
6573     {   28 + 2  ,  16 + 21 ,  40 + 3  ,  28 + 22 , },
6574     {   11 + 18 ,  34 + 1  ,  18 + 23 ,  17 + 31 , },
6575     {   12 + 15 ,  28 + 5  ,  20 + 19 ,  17 + 28 , },
6576     {    6 + 20 ,  15 + 16 ,  19 + 18 ,  12 + 31 , },
6577     {   19 + 5  ,  23 + 7  ,  31 + 4  ,  27 + 14 , },
6578     {    4 + 19 ,  25 + 3  ,  32 + 1  ,   0 + 39 , },
6579     {   10 + 12 ,  22 + 5  ,   8 + 24 ,  17 + 20 , },
6580     {   11 + 10 ,  25 + 1  ,   0 + 30 ,   4 + 31 , },
6581     {    2 + 18 ,   9 + 15 ,   0 + 29 ,   6 + 27 , },
6582     {   18 + 1  ,  11 + 12 ,   2 + 25 ,   0 + 31 , },
6583     {   12 + 6  ,  10 + 12 ,  17 + 9  ,  24 + 6  , },
6584     {    5 + 12 ,  18 + 3  ,   2 + 23 ,   6 + 22 , },
6585     {    1 + 15 ,   7 + 13 ,  19 + 4  ,  18 + 9  , },
6586     {    4 + 11 ,  12 + 7  ,  21 + 1  ,   5 + 20 , },
6587     {   12 + 2  ,   6 + 12 ,   5 + 16 ,  10 + 14 , },
6588     {    4 + 10 ,   7 + 10 ,  17 + 3  ,  17 + 6  , },
6589     {   10 + 3  ,   7 + 9  ,  14 + 5  ,  10 + 12 , },
6590     {    1 + 11 ,  14 + 1  ,   2 + 16 ,  10 + 11 , },
6591     {    2 + 10 ,  12 + 2  ,   6 + 11 ,   1 + 19 , },
6592     {    1 + 10 ,   3 + 11 ,   0 + 16 ,  18 + 1  , },
6593     {    3 + 8  ,   8 + 5  ,   1 + 14 ,  16 + 2  , },
6594     {    9 + 1  ,   9 + 3  ,   3 + 12 ,  14 + 3  , },
6595     {    1 + 9  ,   6 + 6  ,   9 + 5  ,   0 + 16 , },
6596     {    1 + 8  ,   6 + 5  ,  11 + 2  ,   6 + 9  , },
6597     {    2 + 7  ,  10 + 1  ,   3 + 9  ,   5 + 9  , },
6598     {    3 + 5  ,   4 + 6  ,   7 + 5  ,  12 + 2  , },
6599     {    4 + 4  ,   6 + 3  ,  10 + 1  ,   6 + 7  , },
6600     {    0 + 7  ,   3 + 6  ,   8 + 3  ,   2 + 10 , },
6601     {    6 + 1  ,   8 + 1  ,   7 + 3  ,   9 + 3  , },
6602     {    0 + 7  ,   6 + 2  ,   0 + 10 ,   9 + 2  , },
6603     {    0 + 6  ,   1 + 7  ,   5 + 4  ,   5 + 6  , },
6604     {    3 + 3  ,   1 + 6  ,   5 + 4  ,   8 + 2  , },
6605     {    0 + 6  ,   5 + 2  ,   4 + 4  ,   0 + 9  , },
6606     {    1 + 1  ,   0 + 2  ,   1 + 1  ,   0 + 2  , },
6607 };
6608 
6609 const mfxU8 transIdxLPS[64] =
6610 {
6611     55 - 55, 168 - 168, 0 + 1, 0 + 2, 1 + 1, 0 + 4, 1 + 3, 0 + 5, 1 + 5, 4 + 3, 4 + 4, 6 + 3, 8 + 1, 4 + 7, 9 + 2, 1 + 11,
6612     1 + 12, 9 + 4, 10 + 5, 9 + 6, 10 + 6, 14 + 2, 8 + 10, 0 + 18, 18 + 1, 18 + 1, 3 + 18, 10 + 11, 7 + 15, 10 + 12, 18 + 5, 16 + 8,
6613     18 + 6, 14 + 11, 5 + 21, 12 + 14, 25 + 2, 20 + 7, 21 + 7, 5 + 24, 26 + 3, 10 + 20, 21 + 9, 0 + 30, 11 + 20, 12 + 20, 14 + 18, 29 + 4,
6614     22 + 11, 13 + 20, 11 + 23, 33 + 1, 0 + 35, 24 + 11, 22 + 13, 26 + 10, 20 + 16, 35 + 1, 8 + 29, 13 + 24, 19 + 18, 5 + 33, 32 + 6, 32 + 31,
6615 };
6616 
6617 const mfxU8 transIdxMPS[64] =
6618 {
6619     0 + 1, 0 + 2, 2 + 1, 1 + 3, 0 + 5, 2 + 4, 0 + 7, 2 + 6, 3 + 6, 7 + 3, 6 + 5, 4 + 8, 2 + 11, 11 + 3, 10 + 5, 10 + 6,
6620     8 + 9, 14 + 4, 10 + 9, 5 + 15, 0 + 21, 15 + 7, 15 + 8, 4 + 20, 20 + 5, 15 + 11, 14 + 13, 23 + 5, 11 + 18, 17 + 13, 1 + 30, 13 + 19,
6621     17 + 16, 17 + 17, 27 + 8, 0 + 36, 0 + 37, 7 + 31, 25 + 14, 4 + 36, 22 + 19, 8 + 34, 9 + 34, 15 + 29, 16 + 29, 36 + 10, 37 + 10, 25 + 23,
6622     43 + 6, 12 + 38, 43 + 8, 5 + 47, 10 + 43, 25 + 29, 37 + 18, 27 + 29, 38 + 19, 15 + 43, 31 + 28, 24 + 36, 10 + 51, 54 + 8, 28 + 34, 59 + 4,
6623 };
6624 
CabacPackerSimple(mfxU8 * buf,mfxU8 * bufEnd,bool emulationControl)6625 CabacPackerSimple::CabacPackerSimple(mfxU8 * buf, mfxU8 * bufEnd, bool emulationControl)
6626 : OutputBitstream(buf, bufEnd, emulationControl)
6627 , m_codILow(0)
6628 , m_codIRange(510)
6629 , m_bitsOutstanding(0)
6630 , m_BinCountsInNALunits(0)
6631 , m_firstBitFlag(true)
6632 {
6633 }
6634 
PutBitC(mfxU32 B)6635 void CabacPackerSimple::PutBitC(mfxU32 B)
6636 {
6637     if (m_firstBitFlag)
6638         m_firstBitFlag = false;
6639     else
6640         PutBit(B);
6641 
6642     while (m_bitsOutstanding > 0)
6643     {
6644         PutBit(1-B);
6645         m_bitsOutstanding --;
6646     }
6647 }
6648 
RenormE()6649 void CabacPackerSimple::RenormE()
6650 {
6651     while (m_codIRange < 256)
6652     {
6653         if (m_codILow < 256)
6654         {
6655             PutBitC(0);
6656         }
6657         else if (m_codILow >= 512)
6658         {
6659             m_codILow -= 512;
6660             PutBitC(1);
6661         }
6662         else
6663         {
6664             m_codILow -= 256;
6665             m_bitsOutstanding ++;
6666         }
6667         m_codIRange <<= 1;
6668         m_codILow   <<= 1;
6669     }
6670 }
6671 
EncodeBin(mfxU8 * ctx,mfxU8 binVal)6672 void CabacPackerSimple::EncodeBin(mfxU8 * ctx, mfxU8 binVal)
6673 {
6674     mfxU8  pStateIdx = (*ctx) & 0x3F;
6675     mfxU8  valMPS    = ((*ctx) >> 6);
6676     mfxU32 qCodIRangeIdx = (m_codIRange >> 6) & 3;
6677     mfxU32 codIRangeLPS = rangeTabLPS[pStateIdx][qCodIRangeIdx];
6678 
6679     m_codIRange -= codIRangeLPS;
6680 
6681     if (binVal != valMPS)
6682     {
6683         m_codILow   += m_codIRange;
6684         m_codIRange  = codIRangeLPS;
6685 
6686         if (pStateIdx == 0)
6687             valMPS = 1 - valMPS;
6688 
6689         pStateIdx = transIdxLPS[pStateIdx];
6690     }
6691     else
6692     {
6693         pStateIdx = transIdxMPS[pStateIdx];
6694     }
6695     *ctx = ((valMPS<<6) | pStateIdx);
6696 
6697     RenormE();
6698     m_BinCountsInNALunits ++;
6699 }
6700 
TerminateEncode()6701 void CabacPackerSimple::TerminateEncode()
6702 {
6703     m_codIRange -= 2;
6704     m_codILow   += m_codIRange;
6705     m_codIRange = 2;
6706 
6707     RenormE();
6708     PutBitC((m_codILow >> 9) & 1);
6709     PutBit(m_codILow >> 8);
6710     PutTrailingBits();
6711 
6712     m_BinCountsInNALunits ++;
6713 }
6714 
6715 #endif // MFX_ENABLE_H264_VIDEO_ENCODE_HW
6716