1 // Copyright (c) 2018-2019 Intel Corporation
2 //
3 // Permission is hereby granted, free of charge, to any person obtaining a copy
4 // of this software and associated documentation files (the "Software"), to deal
5 // in the Software without restriction, including without limitation the rights
6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 // copies of the Software, and to permit persons to whom the Software is
8 // furnished to do so, subject to the following conditions:
9 //
10 // The above copyright notice and this permission notice shall be included in all
11 // copies or substantial portions of the Software.
12 //
13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 // SOFTWARE.
20 
21 #include "mfxvideo.h"
22 
23 #if MFX_VERSION >= MFX_VERSION_NEXT
24 
25 #include "frame_processor.h"
26 #include "random_generator.h"
27 
28 
29 // Initialize parameters
Init(const InputParams & params)30 void FrameProcessor::Init(const InputParams &params)
31 {
32     m_CropH = params.m_height;
33     m_CropW = params.m_width;
34 
35     m_CTUStr = params.m_CTUStr;
36 
37     // align by CUSize
38     m_Height = MSDK_ALIGN(m_CropH, m_CTUStr.CTUSize);
39     m_Width  = MSDK_ALIGN(m_CropW, m_CTUStr.CTUSize);
40 
41     m_HeightInCTU = m_Height / m_CTUStr.CTUSize;
42     m_WidthInCTU  = m_Width  / m_CTUStr.CTUSize;
43 
44     m_ProcMode = params.m_ProcMode;
45 
46     m_SubPelMode = params.m_SubPixelMode;
47 
48     m_IsForceExtMVPBlockSize = params.m_bIsForceExtMVPBlockSize;
49     m_ForcedExtMVPBlockSize  = params.m_ForcedExtMVPBlockSize;
50     m_GenMVPBlockSize        = SetCorrectMVPBlockSize(params.m_GenMVPBlockSize);
51 }
52 
53 // Beginning of processing of current frame. Only MOD frames are processed
54 
ProcessFrame(FrameChangeDescriptor & frame_descr)55 void FrameProcessor::ProcessFrame(FrameChangeDescriptor & frame_descr)
56 {
57     try
58     {
59         switch (frame_descr.m_changeType)
60         {
61         case GEN:
62             return;
63             break;
64 
65         case MOD:
66             GenCTUParams(frame_descr);
67             GenAndApplyPrediction(frame_descr);
68             UnlockSurfaces(frame_descr);
69             break;
70 
71         case SKIP:
72             UnlockSurfaces(frame_descr);
73             break;
74 
75         default:
76             return;
77             break;
78         }
79     }
80     catch (std::string & e) {
81         std::cout << e << std::endl;
82         throw std::string("ERROR: FrameProcessor::ProcessFrame");
83     }
84     return;
85 }
86 
IsSampleAvailable(mfxU32 AdrX,mfxU32 AdrY)87 bool FrameProcessor::IsSampleAvailable(mfxU32 AdrX, mfxU32 AdrY)
88 {
89     return (AdrY < m_CropH && AdrX < m_CropW);
90 }
91 
92 // Only I420 color format are supported
GetSampleI420(COLOR_COMPONENT comp,mfxU32 AdrX,mfxU32 AdrY,mfxFrameSurface1 * surf)93 mfxU8 FrameProcessor::GetSampleI420(COLOR_COMPONENT comp, mfxU32 AdrX, mfxU32 AdrY, mfxFrameSurface1* surf)
94 {
95     if (surf == nullptr)
96     {
97         throw std::string("ERROR: GetSampleI420: null pointer reference");
98     }
99 
100     switch (comp)
101     {
102     case LUMA_Y:
103         return surf->Data.Y[AdrY * surf->Data.Pitch + AdrX];
104     case CHROMA_U:
105         return surf->Data.U[(AdrY / 2) * (surf->Data.Pitch / 2) + (AdrX / 2)];
106     case CHROMA_V:
107         return surf->Data.V[(AdrY / 2) * (surf->Data.Pitch / 2) + (AdrX / 2)];
108     default:
109         throw std::string("ERROR: Trying to get unspecified component");
110     }
111 }
112 
GetClippedSample(COLOR_COMPONENT comp,mfxI32 X,mfxI32 Y,mfxFrameSurface1 * surf)113 mfxI32 FrameProcessor::GetClippedSample(COLOR_COMPONENT comp, mfxI32 X, mfxI32 Y, mfxFrameSurface1 * surf)
114 {
115     mfxU32 clippedX = Clip3(X, 0, (mfxI32)m_CropW);
116     mfxU32 clippedY = Clip3(Y, 0, (mfxI32)m_CropH);
117 
118     return GetSampleI420(comp, clippedX, clippedY, surf);
119 }
120 
121 // These functions are to be used after applying CalculateLumaPredictionSamplePreWP
122 // In specification default weighted prediction is the final scaling step for sample prediction.
123 // predSampleL0 is an interpolated Luma sample value which is calculated in CalculateLumaPredictionSamplePreWP()
124 // Output is final scaled and rounded Luma value which returns in CalculateLumaPredictionSamplePreWP()
125 // See 8.5.3.3.4.2 "Default weighted sample prediction process" from 4.0 ITU-T H.265 (V4) 2016-12-22
GetDefaultWeightedPredSample(mfxI32 predSampleLx)126 mfxU8 FrameProcessor::GetDefaultWeightedPredSample(mfxI32 predSampleLx)
127 {
128     // These shift and offset variables are defined in H265 standard for 8 bit color depth
129     constexpr mfxU16 shift1 = 6;
130     constexpr mfxU16 offset1 = 1 << (shift1 - 1); // 2^5 = 32
131 
132     constexpr mfxI32 upperBound = (1 << 8) - 1; // 2^8 - 1 = 255
133     mfxI32 predSample = (predSampleLx + offset1) >> shift1;
134 
135     return mfxU8(Clip3(predSample, 0, upperBound));
136 }
137 
138 // Alternative in case working with B-frames
GetDefaultWeightedPredSample(mfxI32 predSampleL0,mfxI32 predSampleL1)139 mfxU8 FrameProcessor::GetDefaultWeightedPredSample(mfxI32 predSampleL0, mfxI32 predSampleL1)
140 {
141     // These shift and offset variables are defined in H265 standard for 8 bit color depth
142     mfxU16 shift2 = 7;
143     mfxU16 offset2 = 1 << (shift2 - 1); // 2^6 = 64
144 
145     mfxI32 upperBound = (1 << 8) - 1;   // 2^8 - 1 = 255
146 
147     mfxI32 predSample = (predSampleL0 + predSampleL1 + offset2) >> shift2;
148 
149     return mfxU8(Clip3(predSample, 0, upperBound));
150 }
151 
GenRandomQuadTreeStructure(QuadTree & QT,mfxU8 minDepth,mfxU8 maxDepth)152 void FrameProcessor::GenRandomQuadTreeStructure(QuadTree& QT, mfxU8 minDepth, mfxU8 maxDepth)
153 {
154     if (!QT.IsEmpty())
155     {
156         QT.Clear();
157     }
158 
159     GenRandomQuadTreeSubstrRecur(QT.root, minDepth, maxDepth);
160 }
161 
GenRandomQuadTreeSubstrRecur(QuadTreeNode & node,mfxU8 minDepth,mfxU8 maxDepth)162 void FrameProcessor::GenRandomQuadTreeSubstrRecur(QuadTreeNode& node, mfxU8 minDepth, mfxU8 maxDepth)
163 {
164     if (node.m_Level < minDepth ||
165         (node.m_Level < maxDepth && GetRandomGen().GetRandomBit()))
166     {
167         node.MakeChildren();
168         for (auto& child : node.m_Children)
169         {
170             GenRandomQuadTreeSubstrRecur(child, minDepth, maxDepth);
171         }
172     }
173     return;
174 }
175 
176 //Make a quad-tree structure inside ctu's QuadTree from FEI output
GenQuadTreeInCTUWithBitMask(CTUDescriptor & CTU,mfxU32 bitMask)177 void FrameProcessor::GenQuadTreeInCTUWithBitMask(CTUDescriptor& CTU, mfxU32 bitMask)
178 {
179     if (bitMask & 1)
180     {
181         CTU.m_CUQuadTree.root.MakeChildren();
182         GenQuadTreeWithBitMaskRecur(CTU.m_CUQuadTree.root, bitMask >> 1);
183     }
184 
185     return;
186 }
187 
GenQuadTreeWithBitMaskRecur(QuadTreeNode & node,mfxU32 bitMask)188 void FrameProcessor::GenQuadTreeWithBitMaskRecur(QuadTreeNode& node, mfxU32 bitMask)
189 {
190     for (mfxU32 i = 0; i < 4; i++)
191     {
192         if (bitMask & (1 << i))
193         {
194             node.m_Children[i].MakeChildren();
195 
196             if (node.m_Children[i].m_Level < 2)
197             {
198                 GenQuadTreeWithBitMaskRecur(node.m_Children[i], bitMask >> 4 * (i + 1));
199             }
200         }
201     }
202 
203     return;
204 }
205 
206 //Fills CU vector inside CTU with correct CU blocks in CTU quad-tree z-scan order
207 //For each CU, depending on the test type, its prediction mode is selected
208 //and further CU partitioning/mode selection is made (i.e. PU and TU partitioning
209 //and per TU intra mode selection)
GenCUVecInCTU(CTUDescriptor & ctu,mfxU16 testType)210 void FrameProcessor::GenCUVecInCTU(CTUDescriptor& ctu, mfxU16 testType)
211 {
212     QuadTree& QT = ctu.m_CUQuadTree;
213     std::vector<BaseBlock> tmpVec;
214     QT.GetQuadTreeBlocksRecur(QT.root, ctu.m_AdrX, ctu.m_AdrY, ctu.m_BHeight, tmpVec);
215 
216     for (auto& block : tmpVec)
217     {
218         CUBlock cu_block(block);
219 
220         if (testType & GENERATE_INTER && testType & GENERATE_INTRA)
221         {
222             cu_block.m_PredType = (GetRandomGen().GetRandomBit()) ?
223                 INTRA_PRED : INTER_PRED;
224         }
225         else if (testType & GENERATE_INTER)
226         {
227              cu_block.m_PredType = INTER_PRED;
228         }
229         else
230         {
231             cu_block.m_PredType = INTRA_PRED;
232         }
233 
234         if (cu_block.m_PredType == INTRA_PRED)
235         {
236             MakeIntraCU(cu_block);
237         }
238         else if (cu_block.m_PredType == INTER_PRED)
239         {
240             MakeInterCU(cu_block, testType);
241         }
242 
243         ctu.m_CUVec.push_back(cu_block);
244     }
245 }
246 
GetRefSampleAvailFlagsForTUsInCTU(CTUDescriptor & CTU)247 void FrameProcessor::GetRefSampleAvailFlagsForTUsInCTU(CTUDescriptor & CTU)
248 {
249     QuadTree& QT = CTU.m_CUQuadTree;
250     std::vector<RefSampleAvail> CURefSampleAvailVec;
251     //CTU is set with RefSamples available in both directions here.
252     //this should be made by guaranteeing that CTUs are far from each other
253     //(at least one initial CTU between two substituted CTUs in both directions)
254     QT.GetQuadTreeRefSampleAvailVector(QT.root, CTU, CTU, CURefSampleAvailVec);
255 
256     if (CURefSampleAvailVec.size() != CTU.m_CUVec.size())
257     {
258         throw std::string("ERROR: GetRefSampleAvailFlagsForTUsInCTU: mismatching CU and RefSampleAvail vector sizes");
259     }
260 
261     for (mfxU32 i = 0; i < CTU.m_CUVec.size(); i++)
262     {
263         CUBlock & CU = CTU.m_CUVec[i];
264         if (CU.m_PredType == INTRA_PRED)
265         {
266             QuadTree& RQT = CU.m_TUQuadTree;
267             std::vector<RefSampleAvail> TURefSampleAvailVec;
268 
269             RQT.GetQuadTreeRefSampleAvailVectorRecur(RQT.root, CU, CURefSampleAvailVec[i], CTU, TURefSampleAvailVec);
270             for (mfxU32 j = 0; j < CU.m_TUVec.size(); j++)
271             {
272                 TUBlock& TU = CU.m_TUVec[j];
273                 TU.m_RefSampleAvail = TURefSampleAvailVec[j];
274             }
275         }
276     }
277 }
278 
GenRandomTUQuadTreeInCU(CUBlock & cu_block)279 void FrameProcessor::GenRandomTUQuadTreeInCU(CUBlock& cu_block)
280 {
281     QuadTree& quadTreeTU = cu_block.m_TUQuadTree;
282     mfxU32 minTUDepth = std::max(0, mfxI32(CeilLog2(cu_block.m_BHeight) - m_CTUStr.maxLog2TUSize));
283     mfxU32 maxTUDepth = std::min(CeilLog2(cu_block.m_BHeight) - m_CTUStr.minLog2TUSize, m_CTUStr.maxTUQTDepth);
284 
285     GenRandomQuadTreeStructure(quadTreeTU, minTUDepth, maxTUDepth);
286 }
287 
288 
289 //Make a quad-tree structure inside ctu's QuadTree so that all CU blocks inside CTU
290 //have a size smaller than specified by maxLog2CUSize and larger than specified by minLog2CUSize
GenRandomCUQuadTreeInCTU(CTUDescriptor & ctu)291 void FrameProcessor::GenRandomCUQuadTreeInCTU(CTUDescriptor& ctu)
292 {
293     QuadTree& quadTreeCU = ctu.m_CUQuadTree;
294     mfxU32 minCUDepth = std::max(0, mfxI32(CeilLog2(ctu.m_BHeight) - m_CTUStr.maxLog2CUSize));
295     mfxU32 maxCUDepth = CeilLog2(ctu.m_BHeight) - m_CTUStr.minLog2CUSize;
296     GenRandomQuadTreeStructure(quadTreeCU, minCUDepth, maxCUDepth);
297 }
298 
299 //TODO:
IsBlockUniform(const BaseBlock & block,PatchBlock & frame)300 bool FrameProcessor::IsBlockUniform(const BaseBlock& block, PatchBlock& frame)
301 {
302     return false;
303 }
304 
305 //TODO:
AlterBorderSamples(const BaseBlock & block,PatchBlock & frame)306 void FrameProcessor::AlterBorderSamples(const BaseBlock& block, PatchBlock& frame)
307 {
308 
309 }
310 
ChooseContrastIntraMode(const BaseBlock & block,std::vector<TUBlock> & block_vec,PatchBlock & frame)311 void FrameProcessor::ChooseContrastIntraMode(const BaseBlock& block, std::vector<TUBlock>& block_vec, PatchBlock& frame)
312 {
313     if (IsBlockUniform(block, frame))
314     {
315         AlterBorderSamples(block, frame);
316     }
317 
318     //distance between initial block and intra predicted patch should be maximized
319     mfxU32 maxDist = 0; //max num of diff that can be reached is 32x32x256 = 2^18
320     //initial block filled with samples from the frame
321     PatchBlock refBlock(block, frame);
322     //here Patch with max distance from the initBlock will be stored
323     PatchBlock maxDistPatch(block);
324     //mode corresponding to maxDistPatch
325     INTRA_MODE maxDistMode = PLANAR;
326 
327     //choose TUs inside block
328     std::vector<TUBlock> TUInCurrBlock;
329     for (auto& TU : block_vec)
330     {
331         if (TU.IsInBlock(block))
332         {
333             TUInCurrBlock.emplace_back(TU);
334         }
335     }
336 
337     //check whether there is TU inside block with no left-down or up-right refSamples available
338     //if so, we can't use corresponding modes and we limit maxModeAvail minModeAvail
339     // and minAngModeAvail in appropriate manner
340     //if left-down samples aren't available, modes 2 - 9 are prohibited
341     //if up-right samples aren't available, modes 27 - 34 are prohibited
342     //in both cases planar mode is prohibited because it uses samples p[-1][N] and p[N][-1]
343     //in coordinates relative to the block, where N is the size of block
344     //see HEVC algorighms and structures page 101(112)
345     //Vivienne Sze Madhukar Budagavi Gary J.Sullivan High Efficiency Video Coding(HEVC) Algorithms and Architectures 2014
346 
347     mfxU32 minModeAvail    = INTRA_MODE::PLANAR;
348     mfxU32 minAngModeAvail = INTRA_MODE::ANG2;
349     mfxU32 maxModeAvail    = INTRA_MODE::ANG34;
350     for (auto& TU : TUInCurrBlock)
351     {
352         if (!TU.m_RefSampleAvail.LeftDown)
353         {
354             minAngModeAvail = INTRA_MODE::ANG10_HOR;
355             minModeAvail    = INTRA_MODE::DC;
356         }
357         if (!TU.m_RefSampleAvail.UpRight)
358         {
359             maxModeAvail = INTRA_MODE::ANG26_VER;
360             minModeAvail = INTRA_MODE::DC;
361         }
362     }
363 
364     for (mfxU32 i = minModeAvail; i <= maxModeAvail; i++)
365     {
366         if (i < INTRA_MODE::ANG2 || i >= minAngModeAvail)
367         {
368             //iterate over TUs of current CU and save predicted TUs into frame
369             for (auto& TU : TUInCurrBlock)
370             {
371                 TU.m_IntraModeChroma = TU.m_IntraModeLuma = INTRA_MODE(i);
372                 MakeTUIntraPrediction(TU, frame);
373             }
374             //get curPatch from frame
375             PatchBlock curPatch(block, frame);
376             //count distance between initial block and current patch
377             mfxU32 curDist = refBlock.CalcYSAD(curPatch);
378             //save patch which is the farthest from the initial CU block
379             if (curDist > maxDist)
380             {
381                 maxDist = curDist;
382                 maxDistPatch = curPatch;
383                 maxDistMode = INTRA_MODE(i);
384             }
385         }
386     }
387 
388     for (auto& TU : block_vec)
389     {
390         if (TU.IsInBlock(block))
391         {
392             //for now chroma intra mode is set equal to luma intra mode
393             TU.m_IntraModeChroma = TU.m_IntraModeLuma = maxDistMode;
394         }
395     }
396 
397     frame.InsertAnotherPatch(maxDistPatch);
398 
399     return;
400 }
401 
402 //Generates a quad-tree TU structure inside the cu_block and fills the TU block vector
403 //inside cu_block with correct TU blocks corresponding to the CU quad-tree TU structure
404 //Selects an intra mode for each generated TU
MakeIntraCU(CUBlock & cu_block)405 void FrameProcessor::MakeIntraCU(CUBlock& cu_block)
406 {
407     QuadTree& QT = cu_block.m_TUQuadTree;
408     GenRandomTUQuadTreeInCU(cu_block);
409 
410     std::vector<BaseBlock> tmpVec;
411     QT.GetQuadTreeBlocksRecur(QT.root, cu_block.m_AdrX, cu_block.m_AdrY, cu_block.m_BHeight, tmpVec);
412     cu_block.m_TUVec.clear();
413     for (auto& block : tmpVec)
414     {
415         cu_block.m_TUVec.emplace_back(TUBlock(block, PLANAR, PLANAR));
416     }
417 
418     mfxU32 minCUSize = 1 << m_CTUStr.minLog2CUSize;
419     //Special case for CUs of size equal to minCUSize:
420     //we can choose intra mode for every quarter
421     //see last paragraph on p.228(238) in HEVC Algorithms and Architectures
422     if (cu_block.m_BHeight == minCUSize && cu_block.m_BWidth == minCUSize
423         && tmpVec.size() >= 4 && GetRandomGen().GetRandomBit())
424     {
425         cu_block.m_IntraPartMode = INTRA_NxN;
426     }
427     else
428     {
429         cu_block.m_IntraPartMode = INTRA_2Nx2N;
430     }
431 }
432 
MakeIntraPredInCTU(CTUDescriptor & ctu,FrameChangeDescriptor & descr)433 void FrameProcessor::MakeIntraPredInCTU(CTUDescriptor& ctu, FrameChangeDescriptor & descr)
434 {
435     ExtendedSurface& surf = *descr.m_frame;
436     //save frame data in temporary patchBlock
437     PatchBlock framePatchBlock(BaseBlock(0, 0, surf.Info.CropW, surf.Info.CropH), surf);
438     for (auto& cu : ctu.m_CUVec)
439     {
440         if (cu.m_PredType == INTRA_PRED)
441         {
442             if (cu.m_IntraPartMode == INTRA_NxN)
443             {
444                 std::vector<BaseBlock>  childrenBlocks;
445                 cu.GetChildBlock(childrenBlocks);
446                 for (mfxU32 i = 0; i < 4; i++)
447                 {
448                     //choose the most contrast intra mode for every quarter of CU
449                     ChooseContrastIntraMode(childrenBlocks[i], cu.m_TUVec, framePatchBlock);
450                 }
451             }
452             else
453             {
454                 ChooseContrastIntraMode(cu, cu.m_TUVec, framePatchBlock);
455             }
456         }
457     }
458 }
459 
460 //Chooses the inter partitioning mode for the CU and fills the PU vector inside it with PUs
461 //corresponding to the chosen mode
MakeInterCU(CUBlock & cu_block,mfxU16 testType)462 void FrameProcessor::MakeInterCU(CUBlock& cu_block, mfxU16  testType)
463 {
464     INTER_PART_MODE mode  = INTER_PART_MODE::INTER_NONE;
465 
466     mfxU32 max_mode_num = -1;
467 
468     if (!(testType & GENERATE_SPLIT) || !m_CTUStr.bCUToPUSplit)
469     {
470         mode = INTER_PART_MODE::INTER_2Nx2N; //If no split is specified or CU to PU split is forbidden,
471                                              //the CU will contain a single PU
472     }
473     else
474     {
475         if (cu_block.m_BHeight == 8 && cu_block.m_BWidth == 8)
476         {
477             //Minimum PU size is 4x8 or 8x4, which means that only first 3 inter partitioning
478             //modes are available for 8x8 CU
479             //Condition when only symmetric modes are supported in case of 8x8 CU is satisfied by default
480             max_mode_num = INTER_8x8CU_PART_MODE_NUM - 1;
481         }
482         else if (m_CTUStr.bForceSymmetricPU)
483         {
484             //No check for 8x8 CU case here. It is already processed
485             max_mode_num = INTER_SYMM_PART_MODE_NUM - 1;
486         }
487         else
488         {
489             max_mode_num = INTER_PART_MODE_NUM - 1;
490         }
491 
492         bool isCUMinSized = (CeilLog2(cu_block.m_BHeight) == m_CTUStr.minLog2CUSize);
493         // CU split into 4 square PUs is only allowed for the CUs
494         // of the smallest size (see p.61-62 of doi:10.1007/978-3-319-06895-4)
495         do
496         {
497             mode = (INTER_PART_MODE) GetRandomGen().GetRandomNumber(0, max_mode_num);
498         }
499         while (!isCUMinSized && mode == INTER_PART_MODE::INTER_NxN);
500 
501     }
502 
503     cu_block.BuildPUsVector(mode);
504     cu_block.m_InterPartMode = mode;
505 }
506 
CeilLog2(mfxU32 size)507 mfxU8 FrameProcessor::CeilLog2(mfxU32 size)
508 {
509     mfxU8 ret = 0;
510     while (size > 1)
511     {
512         size /= 2;
513         ret++;
514     }
515     return ret;
516 }
517 
518 // First round of generation: choose CTU on current MOD frame
519 //
520 // FrameChangeDescriptor & frameDescr - descriptor of current MOD frame
521 
GenCTUParams(FrameChangeDescriptor & frame_descr)522 void FrameProcessor::GenCTUParams(FrameChangeDescriptor & frame_descr)
523 {
524     mfxI32 maxAttempt = 100;
525 
526     // Try no more than maxAttempt times to generate no more than m_CTUStr.CTUMaxNum CTUs
527     for (mfxI32 i = 0; i < maxAttempt && frame_descr.m_vCTUdescr.size() < m_CTUStr.CTUMaxNum; ++i)
528     {
529         // TODO: this part could be improved with aggregate initialization
530         CTUDescriptor tempCTUDsc;
531 
532         // Do not choose last CTU in row/column to avoid effects of alignment
533         // m_WidthInCTU-1 is a coordinate of last CTU in row
534         tempCTUDsc.m_AdrXInCTU = GetRandomGen().GetRandomNumber(0, m_WidthInCTU  - 2);
535         tempCTUDsc.m_AdrYInCTU = GetRandomGen().GetRandomNumber(0, m_HeightInCTU - 2);
536 
537         // Calculate pixel coordinates and size
538         tempCTUDsc.m_AdrX = tempCTUDsc.m_AdrXInCTU * m_CTUStr.CTUSize;
539         tempCTUDsc.m_AdrY = tempCTUDsc.m_AdrYInCTU * m_CTUStr.CTUSize;
540         tempCTUDsc.m_BWidth = tempCTUDsc.m_BHeight = m_CTUStr.CTUSize;
541 
542         // Checks if current CTU intersects with or is too close to any of already generated
543         auto it = find_if(frame_descr.m_vCTUdescr.begin(), frame_descr.m_vCTUdescr.end(),
544             [&](const CTUDescriptor& dscr){ return dscr.CheckForIntersect(tempCTUDsc, m_CTUStr.CTUSize * m_CTUStr.CTUDist, m_CTUStr.CTUSize * m_CTUStr.CTUDist); });
545 
546         if (it == frame_descr.m_vCTUdescr.end())
547         {
548             // If no intersection, put generated block to vector
549             frame_descr.m_vCTUdescr.push_back(std::move(tempCTUDsc));
550         }
551     }
552 
553     return;
554 }
555 
556 // Second round of test: Generate partitions and MVs. Write down pixels to MOD and all reference GEN frames
557 // Remove a CTU generated in the previous round from the test set
558 // if it is impossible to place it without intersecting other CTUs
559 //
560 // FrameChangeDescriptor & frameDescr - descriptor of current MOD frame
561 
GenAndApplyPrediction(FrameChangeDescriptor & frameDescr)562 void FrameProcessor::GenAndApplyPrediction(FrameChangeDescriptor & frameDescr)
563 {
564     // Iterate over all generated in GenCTUParams CTUs (see round 1)
565     auto it_ctu = frameDescr.m_vCTUdescr.begin();
566 
567     if (!(frameDescr.m_testType & GENERATE_SPLIT))
568     {
569         //If no split is specified, set min/max CU size to CTU size
570         //so that the CTU quad-tree only contains single node after generation
571         m_CTUStr.minLog2CUSize = CeilLog2(m_CTUStr.CTUSize);
572         m_CTUStr.maxLog2CUSize = CeilLog2(m_CTUStr.CTUSize);
573     }
574 
575 
576     while (it_ctu != frameDescr.m_vCTUdescr.end())
577     {
578         auto &CTU = *it_ctu;
579 
580         //make a tree and save CUs into the vector inside CTU
581         GenRandomCUQuadTreeInCTU(CTU);
582         GenCUVecInCTU(CTU, frameDescr.m_testType);
583         if (frameDescr.m_testType & GENERATE_INTRA)
584         {
585             GetRefSampleAvailFlagsForTUsInCTU(CTU);
586         }
587 
588         FrameOccRefBlockRecord bak = frameDescr.BackupOccupiedRefBlocks();
589 
590         bool bMVGenSuccess = true;
591         if (frameDescr.m_testType & GENERATE_INTER)
592         {
593             bMVGenSuccess = MakeInterPredInCTU(CTU, frameDescr);
594         }
595         if (bMVGenSuccess)
596         {
597             //Inter prediction must be applied first
598             //because intra blocks should use noise pixels from
599             //adjacent inter CUs and not unchanged picture pixels
600             //in the same spot
601             ApplyInterPredInCTU(CTU, frameDescr);
602 
603             //most contrast intra mode is chosen here
604             MakeIntraPredInCTU(CTU, frameDescr);
605             ApplyIntraPredInCTU(CTU, frameDescr);
606             it_ctu++;
607         }
608         else
609         {
610             //Unable to put the CTU into reference frames without intersection
611             //Restore backup reference block info:
612             frameDescr.RestoreOccupiedRefBlocks(bak);
613 
614             //Remove current CTU from the test block list and updating the iterator
615             it_ctu = frameDescr.m_vCTUdescr.erase(it_ctu);
616         }
617     }
618 
619     return;
620 }
621 
622 //Generates MV and MVP for all PUs in CTU
MakeInterPredInCTU(CTUDescriptor & CTU,FrameChangeDescriptor & frameDescr)623 bool FrameProcessor::MakeInterPredInCTU(CTUDescriptor& CTU, FrameChangeDescriptor& frameDescr)
624 {
625     // First, need to construct an MVP grid and vector pools according to CTU partioning
626     MVMVPProcessor mvmvpProcessor(m_GenMVPBlockSize, m_SubPelMode);
627 
628     if (frameDescr.m_testType & GENERATE_PREDICTION)
629     {
630         mvmvpProcessor.InitMVPGridData(CTU, frameDescr);
631     }
632 
633     for (auto& CU : CTU.m_CUVec)
634     {
635         if (CU.m_PredType == INTER_PRED)
636         {
637             for (auto& PU : CU.m_PUVec)
638             {
639                 // Set prediction flags for each PU
640                 GenPredFlagsForPU(PU, frameDescr.m_frameType);
641 
642                 bool bMVGenSuccess = false;
643                 if (frameDescr.m_testType & GENERATE_PREDICTION)
644                 {
645                     bMVGenSuccess = mvmvpProcessor.GenValidMVMVPForPU(PU, frameDescr);
646                 }
647                 else
648                 {
649                     bMVGenSuccess = mvmvpProcessor.GenValidMVForPU(PU, frameDescr);
650                 }
651 
652                 if (bMVGenSuccess)
653                 {
654                     // Store the shifted PU as a BaseBlock in the corresponding reference frame descriptor
655                     if (PU.predFlagL0)
656                     {
657                         auto itL0 = std::next(frameDescr.m_refDescrList0.begin(), PU.m_MV.RefIdx.RefL0);
658                         itL0->m_OccupiedRefBlocks.emplace_back(PU.GetShiftedBaseBlock(L0));
659                     }
660 
661                     if (PU.predFlagL1)
662                     {
663                         auto itL1 = std::next(frameDescr.m_refDescrList1.begin(), PU.m_MV.RefIdx.RefL1);
664                         itL1->m_OccupiedRefBlocks.emplace_back(PU.GetShiftedBaseBlock(L1));
665                     }
666                 }
667                 else
668                 {
669                     //MV prediction has failed; need to discard the whole CTU
670                     return false;
671                 }
672             }
673         }
674     }
675 
676     //MVs for all PUs in CTU have been generated
677 
678     // If predictors required, put them to ext buffer
679     if (frameDescr.m_testType & GENERATE_PREDICTION && frameDescr.m_procMode == GENERATE)
680     {
681         //Only output predictors if CTU contains at least one inter CU
682         auto it = std::find_if(CTU.m_CUVec.begin(), CTU.m_CUVec.end(),
683             [](const CUBlock& CU) { return CU.m_PredType == INTER_PRED; });
684 
685         if (it != CTU.m_CUVec.end())
686         {
687             mvmvpProcessor.FillFrameMVPExtBuffer(frameDescr);
688         }
689     }
690 
691     if ((frameDescr.m_testType & GENERATE_PREDICTION) && frameDescr.m_procMode == VERIFY)
692     {
693         mvmvpProcessor.GetMVPPools(CTU.m_MVPGenPools);
694     }
695 
696     return true;
697 }
698 
699 
GenPredFlagsForPU(PUBlock & PU,mfxU16 frameType)700 void FrameProcessor::GenPredFlagsForPU(PUBlock & PU, mfxU16 frameType)
701 {
702     PU.predFlagL0 = true; // Set prediction flag for P-frames
703     if (frameType & MFX_FRAMETYPE_B)
704     {
705         // Each PU requires 1 or more reference
706         mfxI32 maxValue = 2;
707         if (PU.m_BWidth == 4 || PU.m_BHeight == 4)
708             maxValue = 1;  // For PUs 8x4 or 4x8 we can use only unidirectional prediction
709 
710         switch (GetRandomGen().GetRandomNumber(0, maxValue))
711         {
712         case 0:
713             PU.predFlagL0 = true;
714             PU.predFlagL1 = false;
715             break;
716         case 1:
717             PU.predFlagL0 = false;
718             PU.predFlagL1 = true;
719             break;
720         case 2:
721             PU.predFlagL0 = true;
722             PU.predFlagL1 = true;
723             break;
724         }
725     }
726 
727     return;
728 }
729 
730 //Iterates over CUs in CTU and applies inter prediction for inter CUs
ApplyInterPredInCTU(CTUDescriptor & CTU,FrameChangeDescriptor & frameDescr)731 void FrameProcessor::ApplyInterPredInCTU(CTUDescriptor& CTU, FrameChangeDescriptor & frameDescr)
732 {
733     for (auto& CU : CTU.m_CUVec)
734     {
735         if (CU.m_PredType == INTER_PRED)
736         {
737             for (auto& PU : CU.m_PUVec)
738             {
739                 // Advance to reference descriptor of desired frame
740                 // NB: using reverse iterator here because RefIdx starts counting from the end of the list
741 
742                 // Generate noisy PU in GEN frames pixels
743                 // This function should be called in both (generate and verify) modes, because includes work with random generator
744                 PutNoiseBlocksIntoFrames(PU, frameDescr);
745 
746                 // Trace back those pixels (using q-pel interpolation if required) and put to current MOD frame
747                 // This function should be called only in the generate mode
748                 if (m_ProcMode == GENERATE)
749                 {
750                     TraceBackAndPutBlockIntoFrame(PU, frameDescr);
751                 }
752             }
753         }
754     }
755 }
756 
757 // Generate and put noisy pixels to surface surf, which corresponds to block BP coordinates
758 
759 // Fill the block with 4x4 noise blocks
PutNoiseBlocksIntoFrames(const PUBlock & PU,const FrameChangeDescriptor & frameDescr,mfxU32 num_coeff,mfxU32 level)760 void FrameProcessor::PutNoiseBlocksIntoFrames(const PUBlock & PU, const FrameChangeDescriptor & frameDescr, mfxU32 num_coeff, mfxU32 level)
761 {
762     BaseBlock BPL0, BPL1;
763 
764     mfxFrameSurface1* refSurfL0 = nullptr;
765     mfxFrameSurface1* refSurfL1 = nullptr;
766 
767     if (PU.predFlagL0)
768     {
769         if (frameDescr.m_refDescrList0.size() <= PU.m_MV.RefIdx.RefL0)
770         {
771             throw std::string("ERROR: PutNoiseBlockIntoFrames: incorrect reference index for list 0");
772         }
773         auto &refDescrL0 = *next(frameDescr.m_refDescrList0.begin(), PU.m_MV.RefIdx.RefL0);
774 
775         // Get frame of reference GEN frame
776         refSurfL0 = refDescrL0.m_frame;
777         if (refSurfL0 == nullptr)
778         {
779             throw std::string("ERROR: PutNoiseBlockIntoFrames: null pointer reference");
780         }
781 
782         BPL0 = PU.GetShiftedBaseBlock(L0);
783     }
784 
785     if (PU.predFlagL1)
786     {
787         if (frameDescr.m_refDescrList1.size() <= PU.m_MV.RefIdx.RefL1)
788         {
789             throw std::string("ERROR: PutNoiseBlockIntoFrames: incorrect reference index for list 1");
790         }
791         auto &refDescrL1 = *next(frameDescr.m_refDescrList1.begin(), PU.m_MV.RefIdx.RefL1);
792 
793         // Get frame of reference GEN frame
794         refSurfL1 = refDescrL1.m_frame;
795         if (refSurfL1 == nullptr)
796         {
797             throw std::string("ERROR: PutNoiseBlockIntoFrames: null pointer reference");
798         }
799 
800         BPL1 = PU.GetShiftedBaseBlock(L1);
801     }
802 
803     //Check that PU can be subdivided evenly into 4x4 blocks
804     if (PU.m_BWidth % 4 || PU.m_BHeight % 4)
805     {
806         throw std::string("ERROR: PutNoiseBlockIntoFrame: invalid block size");
807     }
808     mfxU32 Block4x4NumX = PU.m_AdrX / 4;
809     mfxU32 Block4x4NumY = PU.m_AdrY / 4;
810     mfxU32 FrameWidthIn4x4Blocks = (m_Width + 3) / 4;
811     mfxU32 FrameHeightIn4x4Blocks = (m_Height + 3) / 4;
812 
813     mfxU32 seedOff = frameDescr.m_frame->Data.FrameOrder * FrameWidthIn4x4Blocks * FrameHeightIn4x4Blocks;
814     mfxU32 seed = 0;
815 
816     mfxU32 BPWidthIn4x4Blocks = PU.m_BWidth / 4;
817     mfxU32 BPHeightIn4x4Blocks = PU.m_BHeight / 4;
818     mfxU8 block[16] = {}, blockAdjusted[16] = {};
819     mfxI8 blockDeltaL0[16] = {}, blockDeltaL1[16] = {};
820 
821     for (mfxU32 i = 0; i < BPHeightIn4x4Blocks; i++)
822     {
823         for (mfxU32 j = 0; j < BPWidthIn4x4Blocks; j++)
824         {
825             //Calculate seed from the 4x4 block position inside the PU
826             seed = seedOff + FrameWidthIn4x4Blocks * (Block4x4NumY + i) + Block4x4NumX + j;
827             GetRandomGen().SeedGenerator(seed);
828 
829             //Calculate noise pixel values
830             FillInBlock4x4(num_coeff, level, block);
831 
832             //Calculate delta for difference between L0 and L1 references
833             if (PU.predFlagL0 && PU.predFlagL1)
834                 FillDeltaBlocks4x4(blockDeltaL0, blockDeltaL1);
835 
836             if (m_ProcMode == GENERATE)
837             {
838                 if (PU.predFlagL0)
839                 {
840                     ApplyDeltaPerPixel(PU, blockDeltaL0, block, blockAdjusted);
841                     PutBlock4x4(BPL0.m_AdrX + j * 4, BPL0.m_AdrY + i * 4, blockAdjusted, refSurfL0);
842                 }
843 
844                 if (PU.predFlagL1)
845                 {
846                     ApplyDeltaPerPixel(PU, blockDeltaL1, block, blockAdjusted);
847                     PutBlock4x4(BPL1.m_AdrX + j * 4, BPL1.m_AdrY + i * 4, blockAdjusted, refSurfL1);
848                 }
849             }
850         }
851     }
852 }
853 
854 // Fill block[16] with noise pixels, using up to num_coeff first random DCT coefficients
855 // in the range of (-level; +level)
FillInBlock4x4(mfxU32 num_coeff,mfxU32 level,mfxU8 block[16])856 void FrameProcessor::FillInBlock4x4(mfxU32 num_coeff, mfxU32 level, mfxU8 block[16])
857 {
858     if (block == nullptr)
859     {
860         throw std::string("ERROR: FillInBlock4x4: null pointer reference");
861     }
862 
863     if (num_coeff < 1 || num_coeff > 16)
864     {
865         throw std::string("\nERROR: Wrong num_coeff in FrameProcessor::FillInBlock4x4");
866     }
867     if (level > 255)
868     {
869         throw std::string("\nERROR: Wrong level in FrameProcessor::FillInBlock4x4");
870     }
871 
872     mfxI32 coeff[16];
873     mfxI32 pixels[16];
874     mfxU32 scan[16] = { 0,1,4,8,5,2,3,6,9,12,13,10,7,11,14,15 };
875     memset(coeff, 0, sizeof(coeff));
876     for (mfxU32 i = 0; i < num_coeff; i++)
877     {
878         coeff[scan[i]] = level* GetRandomGen().GetRandomNumber(0, 256) / 256 - level / 2;
879     }
880     if (m_ProcMode == GENERATE)
881     {
882         Inverse4x4(coeff, 4, pixels, 4);
883         for (mfxU32 i = 0; i < 16; i++)
884         {
885             block[i] = ClipIntToChar(128 + pixels[i]);
886         }
887     }
888 
889     return;
890 }
891 
FillDeltaBlocks4x4(mfxI8 blockL0[16],mfxI8 blockL1[16])892 void FrameProcessor::FillDeltaBlocks4x4(mfxI8 blockL0[16], mfxI8 blockL1[16])
893 {
894     if (blockL0 == nullptr || blockL1 == nullptr)
895     {
896         throw std::string("ERROR: FillDeltaBlocks4x4: null pointer reference");
897     }
898 
899     for (mfxU32 i = 0; i < 16; i++)
900     {
901         blockL0[i] = GetRandomGen().GetRandomNumber(0, DELTA_PIXEL_BI_DIRECT);
902         blockL1[i] = -blockL0[i];
903     }
904 
905 
906     return;
907 }
908 
ApplyDeltaPerPixel(const PUBlock & PU,const mfxI8 deltaBlock[16],const mfxU8 inBlock[16],mfxU8 outBlock[16])909 void FrameProcessor::ApplyDeltaPerPixel(const PUBlock & PU, const mfxI8 deltaBlock[16], const mfxU8 inBlock[16], mfxU8 outBlock[16])
910 {
911     if (inBlock == nullptr || outBlock == nullptr)
912     {
913         throw std::string("ERROR: ApplyDeltaPerPixel: null pointer reference");
914     }
915 
916     for (mfxU8 i = 0; i < 16; i++)
917     {
918         outBlock[i] = ClipIntToChar(inBlock[i] + deltaBlock[i]);
919     }
920 
921     return;
922 }
923 
ClipIntToChar(mfxI32 x)924 mfxU8 FrameProcessor::ClipIntToChar(mfxI32 x)
925 {
926     if (x < 0)
927         return 0;
928     else if (x > 255)
929         return 255;
930 
931     return (mfxU8)x;
932 }
933 
934 //Perform inverse 4x4 DCT
Inverse4x4(mfxI32 * src,mfxU32 s_pitch,mfxI32 * dst,mfxU32 d_pitch)935 void FrameProcessor::Inverse4x4(mfxI32 *src, mfxU32 s_pitch, mfxI32 *dst, mfxU32 d_pitch)
936 {
937     if (src == nullptr || dst == nullptr)
938     {
939         throw std::string("ERROR: Inverse4x4: null pointer reference");
940     }
941     const mfxU32 BLOCK_SIZE = 4;
942     mfxI32 tmp[16];
943     mfxI32 *pTmp = tmp, *pblock;
944     mfxI32 p0, p1, p2, p3;
945     mfxI32 t0, t1, t2, t3;
946 
947     // Horizontal
948     for (mfxU32 i = 0; i < BLOCK_SIZE; i++)
949     {
950         pblock = src + i*s_pitch;
951         t0 = *(pblock++);
952         t1 = *(pblock++);
953         t2 = *(pblock++);
954         t3 = *(pblock);
955 
956         p0 = t0 + t2;
957         p1 = t0 - t2;
958         p2 = (t1 >> 1) - t3;
959         p3 = t1 + (t3 >> 1);
960 
961         *(pTmp++) = p0 + p3;
962         *(pTmp++) = p1 + p2;
963         *(pTmp++) = p1 - p2;
964         *(pTmp++) = p0 - p3;
965     }
966 
967     //  Vertical
968     for (mfxU32 i = 0; i < BLOCK_SIZE; i++)
969     {
970         pTmp = tmp + i;
971         t0 = *pTmp;
972         t1 = *(pTmp += BLOCK_SIZE);
973         t2 = *(pTmp += BLOCK_SIZE);
974         t3 = *(pTmp += BLOCK_SIZE);
975 
976         p0 = t0 + t2;
977         p1 = t0 - t2;
978         p2 = (t1 >> 1) - t3;
979         p3 = t1 + (t3 >> 1);
980 
981         *(dst + 0 * d_pitch + i) = p0 + p3;
982         *(dst + 1 * d_pitch + i) = p1 + p2;
983         *(dst + 2 * d_pitch + i) = p1 - p2;
984         *(dst + 3 * d_pitch + i) = p0 - p3;
985     }
986 }
987 
PutBlock4x4(mfxU32 x0,mfxU32 y0,mfxU8 block[16],mfxFrameSurface1 * surf)988 void FrameProcessor::PutBlock4x4(mfxU32 x0, mfxU32 y0, mfxU8 block[16], mfxFrameSurface1* surf)
989 {
990     if (surf == nullptr)
991     {
992         throw std::string("ERROR: PutBlock4x4: null pointer reference");
993     }
994     //put block in the current frame, x0, y0 pixel coordinates
995     for (mfxU32 y = 0; y < 4; y++)
996     {
997         for (mfxU32 x = 0; x < 4; x++)
998         {
999             *(surf->Data.Y + (y0 + y)* surf->Data.Pitch  + x0 + x) = block[4 * y + x];
1000             *(surf->Data.U + ((y0 + y) / 2) * (surf->Data.Pitch / 2) + (x0 + x) / 2) = CHROMA_DEFAULT;
1001             *(surf->Data.V + ((y0 + y) / 2) * (surf->Data.Pitch / 2) + (x0 + x) / 2) = CHROMA_DEFAULT;
1002         }
1003     }
1004     return;
1005 }
1006 
1007 
1008 // Trace back pixels from block BP shifted by MV coordinates on GEN frame surf_from to MOD frame surf_to
1009 //
1010 // bp - block on MOD frame
1011 // mv - it's shift on GEN frame
1012 // surf_from - GEN frame surf
1013 // surf_to - MOD frame surf
1014 
TraceBackAndPutBlockIntoFrame(const PUBlock & PU,FrameChangeDescriptor & descr)1015 void FrameProcessor::TraceBackAndPutBlockIntoFrame(const PUBlock & PU, FrameChangeDescriptor & descr)
1016 {
1017     std::pair<mfxU32, mfxU32> fractOffsetL0(PU.m_MV.MV[0].x & 3, PU.m_MV.MV[0].y & 3);
1018     std::pair<mfxU32, mfxU32> fractOffsetL1(PU.m_MV.MV[1].x & 3, PU.m_MV.MV[1].y & 3);
1019 
1020     auto itL0 = std::next(descr.m_refDescrList0.begin(), PU.m_MV.RefIdx.RefL0);
1021     auto itL1 = std::next(descr.m_refDescrList1.begin(), PU.m_MV.RefIdx.RefL1);
1022 
1023     mfxFrameSurface1* surfDest = nullptr;
1024     mfxFrameSurface1* surfL0Ref = nullptr;
1025     mfxFrameSurface1* surfL1Ref = nullptr;
1026 
1027     surfDest = descr.m_frame;
1028 
1029     if (itL0 != descr.m_refDescrList0.end())
1030     {
1031         surfL0Ref = itL0->m_frame;
1032     }
1033     else
1034     {
1035         throw("ERROR: TraceBackAndPutBlockIntoFrame: L0 ref not found");
1036     }
1037 
1038     if (descr.m_frameType & MFX_FRAMETYPE_B)
1039     {
1040         if (itL1 != descr.m_refDescrList1.end())
1041         {
1042             surfL1Ref = itL1->m_frame;
1043         }
1044         else
1045         {
1046             throw("ERROR: TraceBackAndPutBlockIntoFrame: L1 ref not found");
1047         }
1048     }
1049 
1050     PatchBlock outPatch(PU);
1051     InterpolWorkBlock workBlockL0;
1052     InterpolWorkBlock workBlockL1;
1053 
1054     if (PU.predFlagL0 && PU.predFlagL1)
1055     {
1056         workBlockL0 = GetInterpolWorkBlockPreWP(PU.GetShiftedBaseBlock(L0), fractOffsetL0, surfL0Ref);
1057         workBlockL1 = GetInterpolWorkBlockPreWP(PU.GetShiftedBaseBlock(L1), fractOffsetL1, surfL1Ref);
1058         outPatch = ApplyDefaultWeightedPrediction(workBlockL0, workBlockL1);
1059     }
1060     else if (PU.predFlagL0)
1061     {
1062         workBlockL0 = GetInterpolWorkBlockPreWP(PU.GetShiftedBaseBlock(L0), fractOffsetL0, surfL0Ref);
1063         outPatch = ApplyDefaultWeightedPrediction(workBlockL0);
1064     }
1065     else if (PU.predFlagL1)
1066     {
1067         workBlockL1 = GetInterpolWorkBlockPreWP(PU.GetShiftedBaseBlock(L1), fractOffsetL1, surfL1Ref);
1068         outPatch = ApplyDefaultWeightedPrediction(workBlockL1);
1069     }
1070     else
1071         throw("ERROR: TraceBackAndPutBlockIntoFrame: predFlagL0 and predFlagL1 are equal 0");
1072 
1073     //Adjust outPatch coords so that they correspond to the unshifted PU
1074     outPatch.m_AdrX = PU.m_AdrX;
1075     outPatch.m_AdrY = PU.m_AdrY;
1076 
1077     PutPatchIntoFrame(outPatch, *surfDest);
1078 
1079     return;
1080 }
1081 
1082 
GetInterpolWorkBlockPreWP(const BaseBlock & blockFrom,std::pair<mfxU32,mfxU32> fractOffset,mfxFrameSurface1 * surfFrom)1083 InterpolWorkBlock FrameProcessor::GetInterpolWorkBlockPreWP(const BaseBlock & blockFrom, std::pair<mfxU32, mfxU32> fractOffset, mfxFrameSurface1* surfFrom)
1084 {
1085     if (surfFrom == nullptr)
1086     {
1087         throw std::string("ERROR: GetInterpolWorkBlockPreWP: null pointer reference");
1088     }
1089     InterpolWorkBlock workBlock(blockFrom);
1090 
1091     //Luma
1092     for (mfxU32 i = 0; i < blockFrom.m_BHeight; i++)
1093     {
1094         for (mfxU32 j = 0; j < blockFrom.m_BWidth; j++)
1095         {
1096             mfxU32 offset = i * blockFrom.m_BWidth + j;
1097             workBlock.m_YArr[offset] = CalculateLumaPredictionSamplePreWP(
1098                 std::make_pair(blockFrom.m_AdrX + j, blockFrom.m_AdrY + i), fractOffset, surfFrom);
1099         }
1100     }
1101 
1102     // Chroma(YV12 / I420 only) - TODO: enable correct interpolation for chroma
1103     //NB: MFX_FOURCC_YV12 is an umbrella designation for both YV12 and I420 here, as
1104     //the process of copying pixel values in memory is the same
1105 
1106     //TODO: implement proper chroma interpolation
1107     if (surfFrom->Info.FourCC == MFX_FOURCC_YV12)
1108     {
1109         for (mfxU32 i = 0; i < blockFrom.m_BHeight / 2; ++i)
1110         {
1111             for (mfxU32 j = 0; j < blockFrom.m_BWidth / 2; j++)
1112             {
1113                 mfxU32 offsetChr = i * blockFrom.m_BWidth / 2 + j;
1114                 workBlock.m_UArr[offsetChr] = surfFrom->Data.U[
1115                     (blockFrom.m_AdrY / 2 + i) * surfFrom->Data.Pitch / 2 + blockFrom.m_AdrX / 2 + j];
1116                 workBlock.m_VArr[offsetChr] = surfFrom->Data.V[
1117                     (blockFrom.m_AdrY / 2 + i) * surfFrom->Data.Pitch / 2 + blockFrom.m_AdrX / 2 + j];
1118 
1119                 //For now, just scale uninterpolated chroma so that we can call default weighted prediction
1120                 //on chroma components the same way we do with luma
1121                 workBlock.m_UArr[offsetChr] <<= 6;
1122                 workBlock.m_VArr[offsetChr] <<= 6;
1123             }
1124         }
1125     }
1126 
1127     return workBlock;
1128 }
1129 
ApplyDefaultWeightedPrediction(InterpolWorkBlock & workBlockLx)1130 PatchBlock FrameProcessor::ApplyDefaultWeightedPrediction(InterpolWorkBlock & workBlockLx)
1131 {
1132     PatchBlock outPatch(static_cast<BaseBlock>(workBlockLx));
1133 
1134     //Luma
1135     for (mfxU32 i = 0; i < outPatch.m_BHeight * outPatch.m_BWidth; i++)
1136     {
1137         outPatch.m_YPlane[i] = GetDefaultWeightedPredSample(workBlockLx.m_YArr[i]);
1138     }
1139 
1140     //Chroma
1141     for (mfxU32 i = 0; i < outPatch.m_BHeight / 2 * outPatch.m_BWidth / 2; i++)
1142     {
1143         outPatch.m_UPlane[i] = GetDefaultWeightedPredSample(workBlockLx.m_UArr[i]);
1144         outPatch.m_VPlane[i] = GetDefaultWeightedPredSample(workBlockLx.m_VArr[i]);
1145     }
1146     return outPatch;
1147 }
1148 
ApplyDefaultWeightedPrediction(InterpolWorkBlock & workBlockL0,InterpolWorkBlock & workBlockL1)1149 PatchBlock FrameProcessor::ApplyDefaultWeightedPrediction(InterpolWorkBlock & workBlockL0, InterpolWorkBlock & workBlockL1)
1150 {
1151     if (workBlockL0.m_BHeight != workBlockL1.m_BHeight || workBlockL0.m_BWidth != workBlockL1.m_BWidth)
1152     {
1153         throw std::string("ERROR: ApplyDefaultWeightedPrediction: InterpolWorkBlocks for bi-prediction must have same size");
1154     }
1155     PatchBlock outPatch(static_cast<BaseBlock>(workBlockL0));
1156     for (mfxU32 i = 0; i < outPatch.m_BHeight * outPatch.m_BWidth; i++)
1157     {
1158         outPatch.m_YPlane[i] = GetDefaultWeightedPredSample(workBlockL0.m_YArr[i], workBlockL1.m_YArr[i]);
1159     }
1160 
1161     for (mfxU32 i = 0; i < outPatch.m_BHeight / 2 * outPatch.m_BWidth / 2; i++)
1162     {
1163         outPatch.m_UPlane[i] = GetDefaultWeightedPredSample(workBlockL0.m_UArr[i], workBlockL1.m_UArr[i]);
1164         outPatch.m_VPlane[i] = GetDefaultWeightedPredSample(workBlockL0.m_VArr[i], workBlockL1.m_VArr[i]);
1165      }
1166 
1167     return outPatch;
1168 }
1169 
SetCorrectMVPBlockSize(mfxU8 mvpBlockSizeParam)1170 mfxU8 FrameProcessor::SetCorrectMVPBlockSize(mfxU8 mvpBlockSizeParam)
1171 {
1172     if (!mvpBlockSizeParam)
1173     {
1174         switch (m_CTUStr.CTUSize)
1175         {
1176         case 16:
1177             return 1;
1178         case 32:
1179             return 2;
1180         case 64:
1181             return 3;
1182         default:
1183             break;
1184         }
1185     }
1186     return mvpBlockSizeParam;
1187 }
1188 
1189 // Returns predicted luma value (Y) for sample with provided location on reference frame given in quarter-pixel units
1190 //
1191 // refSamplePositionFull - (xInt,yInt) Luma location on the reference frame given in full-sample units. Assumed (x,y) has correct value.
1192 // refSamplePositionFract - (xFract,yFract) Luma location on the reference frame given in quarter-sample units.
1193 // refSurface - reference frame, containing luma samples
1194 // Luma interpolation process described in H265 standard (p.163 - 165)
1195 
CalculateLumaPredictionSamplePreWP(const std::pair<mfxU32,mfxU32> & refSamplePositionFull,const std::pair<mfxU32,mfxU32> & refSamplePositionFract,mfxFrameSurface1 * refSurface)1196 mfxI32 FrameProcessor::CalculateLumaPredictionSamplePreWP(const std::pair<mfxU32, mfxU32>& refSamplePositionFull,
1197     const std::pair<mfxU32, mfxU32>& refSamplePositionFract, mfxFrameSurface1 * refSurface)
1198 {
1199     mfxU32 xFull = refSamplePositionFull.first;
1200     mfxU32 yFull = refSamplePositionFull.second;
1201     mfxU32 xFract = refSamplePositionFract.first;
1202     mfxU32 yFract = refSamplePositionFract.second;
1203 
1204     // These shift variables used below are specified in H265 spec for 8 bit Luma depth
1205     // shift1 := 0
1206     // shift2 := 6
1207     // shift3 := 6
1208 
1209     // Stores output of the sub-sample filtering process
1210     mfxI32 interpolatedSample = 0;
1211 
1212     /*
1213     // Integer and quarter sample positions used for interpolation
1214 
1215     A-10 O O O | A00 a00 b00 c00 | A10 O O O A20
1216     d-10 O O O | d00 e00 f00 g00 | d10 O O O d20
1217     h-10 O O O | h00 i00 j00 k00 | h10 O O O h20
1218     n-10 O O O | n00 p00 q00 r00 | n10 O O O n20
1219     */
1220 
1221     switch (xFract)
1222     {
1223     case 0:
1224         switch (yFract)
1225         {
1226         case 0:
1227             // A << shift3
1228             interpolatedSample = ApplyVerticalSubSampleLumaFilter(xFull, yFull,
1229                 refSurface, LUMA_SUBSAMPLE_FILTER_COEFF[0]) * 64;
1230             break;
1231         case 1:
1232             // d00 := (-A(0,-3) + 4*A(0,-2) - 10*A(0,-1) + 58*A(0,0) + 17*A(0,1) - 5*A(0,2) + A(0,3)) >> shift1
1233             interpolatedSample = ApplyVerticalSubSampleLumaFilter(xFull, yFull,
1234                 refSurface, LUMA_SUBSAMPLE_FILTER_COEFF[1]);
1235             break;
1236         case 2:
1237             // h00 := (-A(0,-3) + 4*A(0,-2) - 11*A(0,-1) + 40*A(0,0) + 40*A(0,1) - 11*A(0,2) + 4*A(0,3) - A(0,4)) >> shift1
1238             interpolatedSample = ApplyVerticalSubSampleLumaFilter(xFull, yFull,
1239                 refSurface, LUMA_SUBSAMPLE_FILTER_COEFF[2]);
1240             break;
1241         case 3:
1242             // n00 := (A(0,-2) - 5*A(0,-1) + 17*A(0,0) + 58*A(0,1) - 10*A(0,2) + 4*A(0,3) - A(0,4)) >> shift1
1243             interpolatedSample = ApplyVerticalSubSampleLumaFilter(xFull, yFull,
1244                 refSurface, LUMA_SUBSAMPLE_FILTER_COEFF[3]);
1245             break;
1246         default:
1247             break;
1248         }
1249         break;
1250     case 1:
1251     {
1252         // a0i, where i = -3..4
1253         // a0i = [a(0,-3) a(0,-2) a(0,-1) a(0,0) a(0,1) a(0,2) a(0,3) a(0,4)]
1254         std::vector<mfxI32> fractUtilSamples;
1255         fractUtilSamples.reserve(LUMA_TAPS_NUMBER);
1256 
1257         for (mfxI32 i = 0; i < LUMA_TAPS_NUMBER; i++)
1258         {
1259             // a0i := (-A(-3,i) + 4*A(-2,i) - 10*A(-1,i) + 58*A(0,i) + 17*A(1,i) - 5*A(2,i) + A(3,i) >> shift1)
1260             fractUtilSamples.push_back(ApplyHorizontalSubSampleLumaFilter(xFull,
1261                 yFull + (i - 3), refSurface, LUMA_SUBSAMPLE_FILTER_COEFF[1]));
1262         }
1263 
1264         switch (yFract)
1265         {
1266         case 0:
1267             // a00 := a(0,0)
1268             interpolatedSample = std::inner_product(fractUtilSamples.begin(), fractUtilSamples.end(),
1269                 LUMA_SUBSAMPLE_FILTER_COEFF[0], 0);
1270             break;
1271         case 1:
1272             // e00 := (-a(0,-3) + 4*a(0,-2) - 10*a(0,-1) + 58*a(0,0) + 17*a(0,1) - 5*a(0,2) + a(0,3) >> shift2)
1273             interpolatedSample = std::inner_product(fractUtilSamples.begin(),
1274                 fractUtilSamples.end(), LUMA_SUBSAMPLE_FILTER_COEFF[1], 0) / 64;
1275             break;
1276         case 2:
1277             // i00 := (-a(0,-3) + 4*a(0,-2) - 11*a(0,-1) + 40*a(0,0) + 40*a(0,1) - 11*a(0,2) + 4*a(0,3) - a(0,4) >> shift2)
1278             interpolatedSample = std::inner_product(fractUtilSamples.begin(),
1279                 fractUtilSamples.end(), LUMA_SUBSAMPLE_FILTER_COEFF[2], 0) / 64;
1280             break;
1281         case 3:
1282             // p00 := (a(0,-2) - 5*a(0,-1) + 17*a(0,0) + 58*a(0,1) - 10*a(0,2) + 4*a(0,3) - a(0,4) >> shift2)
1283             interpolatedSample = std::inner_product(fractUtilSamples.begin(), fractUtilSamples.end(),
1284                 LUMA_SUBSAMPLE_FILTER_COEFF[3], 0) / 64;
1285             break;
1286         default:
1287             break;
1288         }
1289         break;
1290     }
1291     case 2:
1292     {
1293         // b0i, where i = -3..4
1294         // b0i = [b(0,-3) b(0,-2) b(0,-1) b(0,0) b(0,1) b(0,2) b(0,3) b(0,4)]
1295         std::vector<mfxI32> fractUtilSamples;
1296         fractUtilSamples.reserve(LUMA_TAPS_NUMBER);
1297 
1298         for (mfxI32 i = 0; i < LUMA_TAPS_NUMBER; i++)
1299         {
1300             // b0i := (-A(-3,i) + 4*A(-2,i) - 11*A(-1,i) + 40*A(0,i) + 40*A(1,i) - 11*A(2,i) + 4*A(3,i) - A(4,i) >> shift1)
1301             fractUtilSamples.push_back(ApplyHorizontalSubSampleLumaFilter(xFull, yFull + (i - 3),
1302                 refSurface, LUMA_SUBSAMPLE_FILTER_COEFF[2]));
1303         }
1304 
1305         switch (yFract)
1306         {
1307         case 0:
1308             // b00 := (-A(-3,0) + 4*A(-2,0) - 11*A(-1,0) + 40*A(0,0) + 40*A(1,0) - 11*A(2,0) + 4*A(3,0) - A(4,0) >> shift1)
1309             interpolatedSample = std::inner_product(fractUtilSamples.begin(), fractUtilSamples.end(),
1310                 LUMA_SUBSAMPLE_FILTER_COEFF[0], 0);
1311             break;
1312         case 1:
1313             // f00 := (-b(0,-3) + 4*b(0,-2) - 10*b(0,-1) + 58*b(0,0) + 17*b(0,1) - 5*b(0,2) + b(0,3) >> shift2)
1314             interpolatedSample = std::inner_product(fractUtilSamples.begin(), fractUtilSamples.end(),
1315                 LUMA_SUBSAMPLE_FILTER_COEFF[1], 0) / 64;
1316             break;
1317         case 2:
1318             // j00 := (-b(0,-3) + 4*b(0,-2) - 11*b(0,-1) + 40*b(0,0) + 40*b(0,1) - 11*b(0,2) + 4*b(0,3) - b(0,4) >> shift2)
1319             interpolatedSample = std::inner_product(fractUtilSamples.begin(), fractUtilSamples.end(),
1320                 LUMA_SUBSAMPLE_FILTER_COEFF[2], 0) / 64;
1321             break;
1322         case 3:
1323             // q00 := (b(0,-2) - 5*b(0,-1) + 17*b(0,0) + 58*b(0,1) - 10*b(0,2) + 4*b(0,3) - b(0,4) >> shift2)
1324             interpolatedSample = std::inner_product(fractUtilSamples.begin(), fractUtilSamples.end(),
1325                 LUMA_SUBSAMPLE_FILTER_COEFF[3], 0) / 64;
1326             break;
1327         default:
1328             break;
1329         }
1330         break;
1331     }
1332     case 3:
1333     {
1334         // c0i, where i = -3..4
1335         // c0i = [c(0,-3) c(0,-2) c(0,-1) c(0,0) c(0,1) c(0,2) c(0,3) c(0,4)]
1336         std::vector<mfxI32> fractUtilSamples;
1337         fractUtilSamples.reserve(LUMA_TAPS_NUMBER);
1338 
1339         for (mfxI32 i = 0; i < LUMA_TAPS_NUMBER; i++)
1340         {
1341             // c0i := (A(-2,i) - 5*A(-1,i) + 17*A(0,i) + 58*A(1,i) - 10*A(2,i) + 4*A(3,i) - A(4,i) >> shift1)
1342             fractUtilSamples.push_back(ApplyHorizontalSubSampleLumaFilter(xFull, yFull + (i - 3),
1343                 refSurface, LUMA_SUBSAMPLE_FILTER_COEFF[3]));
1344         }
1345 
1346         switch (yFract)
1347         {
1348         case 0:
1349             // c00 := c(0,0)
1350             interpolatedSample = std::inner_product(fractUtilSamples.begin(), fractUtilSamples.end(),
1351                 LUMA_SUBSAMPLE_FILTER_COEFF[0], 0);
1352             break;
1353         case 1:
1354             // g00 := (-c(0,-3) + 4*c(0,-2) - 10*c(0,-1) + 58*c(0,0) + 17*c(0,1) - 5*c(0,2) + c(0,3) >> shift2)
1355             interpolatedSample = std::inner_product(fractUtilSamples.begin(), fractUtilSamples.end(),
1356                 LUMA_SUBSAMPLE_FILTER_COEFF[1], 0) / 64;
1357             break;
1358         case 2:
1359             // k00 := (-c(0,-3) + 4*c(0,-2) - 11*c(0,-1) + 40*c(0,0) + 40*c(0,1) - 11*c(0,2) + 4*c(0,3) - c(0,4) >> shift2)
1360             interpolatedSample = std::inner_product(fractUtilSamples.begin(), fractUtilSamples.end(),
1361                 LUMA_SUBSAMPLE_FILTER_COEFF[2], 0) / 64;
1362             break;
1363         case 3:
1364             // r00 := (c(0,-2) - 5*c(0,-1) + 17*c(0,0) + 58*c(0,1) - 10*c(0,2) + 4*c(0,3) - c(0,4) >> shift2)
1365             interpolatedSample = std::inner_product(fractUtilSamples.begin(), fractUtilSamples.end(),
1366                 LUMA_SUBSAMPLE_FILTER_COEFF[3], 0) / 64;
1367             break;
1368         default:
1369             break;
1370         }
1371         break;
1372     }
1373     default:
1374         break;
1375     }
1376 
1377     return interpolatedSample;
1378 }
1379 
ApplyVerticalSubSampleLumaFilter(mfxU32 x,mfxU32 y,mfxFrameSurface1 * refSurface,const mfxI32 * coeff)1380 mfxI32 FrameProcessor::ApplyVerticalSubSampleLumaFilter(mfxU32 x, mfxU32 y, mfxFrameSurface1 * refSurface, const mfxI32 * coeff)
1381 {
1382     mfxI32 sum = 0;
1383 
1384     for (mfxI32 i = -3; i <= 4; i++)
1385     {
1386         sum += coeff[i + 3] * GetClippedSample(COLOR_COMPONENT::LUMA_Y, (mfxI32)x, (mfxI32)y + i, refSurface);
1387     }
1388 
1389     return sum;
1390 }
1391 
ApplyHorizontalSubSampleLumaFilter(mfxU32 x,mfxU32 y,mfxFrameSurface1 * refSurface,const mfxI32 * coeff)1392 mfxI32 FrameProcessor::ApplyHorizontalSubSampleLumaFilter(mfxU32 x, mfxU32 y, mfxFrameSurface1 * refSurface, const mfxI32 * coeff)
1393 {
1394     mfxI32 sum = 0;
1395 
1396     for (mfxI32 i = -3; i <= 4; i++)
1397     {
1398         sum += coeff[i + 3] * GetClippedSample(COLOR_COMPONENT::LUMA_Y, (mfxI32)x + i, (mfxI32)y, refSurface);
1399     }
1400 
1401     return sum;
1402 }
1403 
1404 // Release processed surfaces
1405 
UnlockSurfaces(FrameChangeDescriptor & frame_descr)1406 void FrameProcessor::UnlockSurfaces(FrameChangeDescriptor & frame_descr)
1407 {
1408     frame_descr.m_frame->Data.Locked = 0;
1409 
1410     // Unlock all reference GEN frames recursively
1411     if (frame_descr.m_refDescrList0.empty() && frame_descr.m_refDescrList1.empty()) return;
1412     else
1413     {
1414         for (auto & ref_descr : frame_descr.m_refDescrList0)
1415         {
1416             UnlockSurfaces(ref_descr);
1417         }
1418         for (auto & ref_descr : frame_descr.m_refDescrList1)
1419         {
1420             UnlockSurfaces(ref_descr);
1421         }
1422     }
1423 
1424     return;
1425 }
1426 
1427 //this function has the same behavior for any color component
1428 //it is more convenient to have input in the coordinates of colorComp component space
FillIntraRefSamples(mfxU32 cSize,mfxU32 cAdrX,mfxU32 cAdrY,const PatchBlock & frame,COLOR_COMPONENT colorComp,std::vector<mfxU8> & refSamples)1429 void FrameProcessor::FillIntraRefSamples(mfxU32 cSize, mfxU32 cAdrX, mfxU32 cAdrY, const PatchBlock& frame, COLOR_COMPONENT colorComp, std::vector<mfxU8>& refSamples)
1430 {
1431     refSamples.clear();
1432     const mfxU32 NO_SAMPLES_AVAILABLE = 0xffffffff;
1433     mfxU8 prevSampleAvail = 128; //default ref sample value is 128 if no real ref samples are available
1434     mfxU32 firstSampleAvailPos = NO_SAMPLES_AVAILABLE; //position of the first available ref sample in refSamples
1435 
1436     //fill vertical part
1437     mfxU32 currCAdrX = cAdrX - 1;
1438     mfxU32 currCAdrY = cAdrY + 2 * cSize - 1;
1439 
1440     for (mfxU32 i = 0; i < 2 * cSize + 1; i++, currCAdrY--)
1441     {
1442         if (IsSampleAvailable(currCAdrX, currCAdrY))
1443         {
1444             prevSampleAvail = frame.GetSampleI420(colorComp, currCAdrX, currCAdrY);
1445             refSamples.push_back(prevSampleAvail);
1446             if (firstSampleAvailPos == NO_SAMPLES_AVAILABLE)
1447             {
1448                 firstSampleAvailPos = i;
1449             }
1450         }
1451         else
1452         {
1453             refSamples.push_back(prevSampleAvail);
1454         }
1455     }
1456     currCAdrX = cAdrX;
1457     currCAdrY = cAdrY - 1;
1458 
1459     //fill horizontal part
1460     for (mfxU32 i = 2 * cSize + 1; i < 4 * cSize + 1; i++, currCAdrX++)
1461     {
1462         if (IsSampleAvailable(currCAdrX, currCAdrY))
1463         {
1464             prevSampleAvail = frame.GetSampleI420(colorComp, currCAdrX, currCAdrY);
1465             refSamples.push_back(prevSampleAvail);
1466             if (firstSampleAvailPos == NO_SAMPLES_AVAILABLE)
1467             {
1468                 firstSampleAvailPos = i;
1469             }
1470         }
1471         else
1472         {
1473             refSamples.push_back(prevSampleAvail);
1474         }
1475     }
1476     //fill initial part with with first available ref sample value
1477     if (firstSampleAvailPos != NO_SAMPLES_AVAILABLE)
1478     {
1479         std::fill(refSamples.begin(), refSamples.begin() + firstSampleAvailPos, refSamples[firstSampleAvailPos]);
1480     }
1481 }
1482 
ChooseFilter(std::vector<mfxU8> & RefSamples,mfxU8 size,INTRA_MODE mode)1483 FILTER_TYPE FrameProcessor::ChooseFilter(std::vector<mfxU8>& RefSamples, mfxU8 size, INTRA_MODE mode) {
1484     FILTER_TYPE filter = NO_FILTER;
1485     if (mode == DC || size == 4)
1486         return filter;
1487     switch (size) {
1488     case 8:
1489         if (mode == 2 || mode == 18 || mode == 34)
1490             filter = THREE_TAP_FILTER;
1491         break;
1492     case 16:
1493         filter = THREE_TAP_FILTER;
1494         if ((mode > 8 && mode < 12) || (mode > 24 && mode < 28))
1495             filter = NO_FILTER;
1496         break;
1497     case 32:
1498         filter = THREE_TAP_FILTER;
1499         if (mode == ANG10_HOR || mode == ANG26_VER)
1500             filter = NO_FILTER;
1501         else if (std::abs(RefSamples[0] + RefSamples[2 * size] - 2 * RefSamples[size]) < 8 &&
1502             std::abs(RefSamples[2 * size] + RefSamples[4 * size] - 2 * RefSamples[3 * size]) < 8)
1503             filter = STRONG_INTRA_SMOOTHING_FILTER;
1504         break;
1505     default:
1506         break;
1507     }
1508     return filter;
1509 }
1510 
ThreeTapFilter(std::vector<mfxU8> & RefSamples,mfxU8 size)1511 void FrameProcessor::ThreeTapFilter(std::vector<mfxU8>& RefSamples, mfxU8 size) {
1512     for (mfxU8 i = 1; i < (size << 2); i++)
1513         RefSamples[i] = (RefSamples[i - 1] + 2 * RefSamples[i] + RefSamples[i + 1] + 2) >> 2;
1514 }
1515 
StrongFilter(std::vector<mfxU8> & RefSamples,mfxU8 size)1516 void FrameProcessor::StrongFilter(std::vector<mfxU8>& RefSamples, mfxU8 size) {
1517     for (mfxU8 i = 1; i < 2 * size; i++)
1518         RefSamples[i] = (i * RefSamples[2 * size] + (2 * size - i) * RefSamples[0] + 32) >> 6;
1519     for (mfxU8 i = 1; i < 2 * size; i++)
1520         RefSamples[2 * size + i] = ((2 * size - i) * RefSamples[2 * size] + i * RefSamples[4 * size] + 32) >> 6;
1521 }
1522 
MakeFilter(std::vector<mfxU8> & RefSamples,mfxU8 size,INTRA_MODE mode)1523 FILTER_TYPE FrameProcessor::MakeFilter(std::vector<mfxU8>& RefSamples, mfxU8 size, INTRA_MODE mode) {
1524     FILTER_TYPE filter = ChooseFilter(RefSamples, size, mode);
1525     switch (filter) {
1526     case NO_FILTER:
1527         break;
1528     case THREE_TAP_FILTER:
1529         ThreeTapFilter(RefSamples, size);
1530         break;
1531     case STRONG_INTRA_SMOOTHING_FILTER:
1532         StrongFilter(RefSamples, size);
1533         break;
1534     default:
1535         break;
1536     }
1537     return filter;
1538 }
1539 
MakeProjRefArray(const std::vector<mfxU8> & RefSamples,mfxU8 size,const IntraParams & IntraMode,std::vector<mfxU8> & ProjRefSamples)1540 mfxU8 FrameProcessor::MakeProjRefArray(const std::vector<mfxU8>& RefSamples, mfxU8 size, const IntraParams& IntraMode, std::vector<mfxU8>& ProjRefSamples)
1541 {
1542     mfxU8 NumProj = 0;
1543 
1544     if (IntraMode.direction == HORIZONTAL)
1545     {
1546         ProjRefSamples.insert(ProjRefSamples.end(), RefSamples.begin(), RefSamples.begin() + 2 * size + 1);
1547         if (IntraMode.intraPredAngle < 0)
1548         {
1549             if (IntraMode.invAngle == 0)
1550             {
1551                 throw std::string("ERROR: MakeProjRefArray: invAngle == 0 for angular mode with intraPredAngle < 0");
1552             }
1553             mfxI8 y = -1;
1554             mfxI32 sampleForProjectionPos = 2 * size + ((y * IntraMode.invAngle + 128) >> 8);
1555             while (sampleForProjectionPos < 4 * size + 1)
1556             {
1557                 ProjRefSamples.push_back(RefSamples[sampleForProjectionPos]);
1558                 sampleForProjectionPos = 2 * size + ((--y * IntraMode.invAngle + 128) >> 8);
1559             }
1560         }
1561         std::reverse(ProjRefSamples.begin(), ProjRefSamples.end());
1562         NumProj = (mfxU8)(ProjRefSamples.size() - 2 * size - 1);
1563     }
1564     else if (IntraMode.direction == VERTICAL)
1565     {
1566         if (IntraMode.intraPredAngle < 0)
1567         {
1568             if (IntraMode.invAngle == 0)
1569             {
1570                 throw std::string("ERROR: MakeProjRefArray: invAngle == 0 for angular mode with intraPredAngle < 0");
1571             }
1572             mfxI8 x = -1;
1573             mfxI32 sampleForProjectionPos = 2 * size - ((x * IntraMode.invAngle + 128) >> 8);
1574 
1575             while (sampleForProjectionPos > -1)
1576             {
1577                 ProjRefSamples.push_back(RefSamples[sampleForProjectionPos]);
1578                 sampleForProjectionPos = 2 * size - ((--x * IntraMode.invAngle + 128) >> 8);
1579             }
1580 
1581             std::reverse(ProjRefSamples.begin(), ProjRefSamples.end());
1582         }
1583 
1584         NumProj = (mfxU8)ProjRefSamples.size();
1585         ProjRefSamples.insert(ProjRefSamples.end(), RefSamples.begin() + 2 * size, RefSamples.end());
1586     }
1587     return NumProj;
1588 }
1589 
PlanarPrediction(const std::vector<mfxU8> & RefSamples,mfxU8 size,mfxU8 * patch)1590 void FrameProcessor::PlanarPrediction(const std::vector<mfxU8>& RefSamples, mfxU8 size, mfxU8 * patch)
1591 {
1592     if (patch == nullptr)
1593     {
1594         throw std::string("ERROR: PlanarPrediction: pointer to buffer is null\n");
1595     }
1596 
1597     for (mfxI32 y = 0; y < size; y++)
1598     {
1599         for (mfxI32 x = 0; x < size; x++)
1600         {
1601             patch[y * size + x] = (
1602                 (size - 1 - x) * RefSamples[2 * size - 1 - y]
1603                 + (x + 1) * RefSamples[3 * size + 1]
1604                 + (size - 1 - y) * RefSamples[2 * size + 1 + x]
1605                 + (y + 1) * RefSamples[size - 1]
1606                 + size) / (size * 2);
1607         }
1608     }
1609 
1610 }
1611 
DCPrediction(const std::vector<mfxU8> & RefSamples,mfxU8 size,mfxU8 * patch)1612 void FrameProcessor::DCPrediction(const std::vector<mfxU8>& RefSamples, mfxU8 size, mfxU8 * patch)
1613 {
1614     if (patch == nullptr)
1615     {
1616         throw std::string("ERROR: DCPrediction: pointer to buffer is null\n");
1617     }
1618 
1619     mfxU32 DCValue = size;
1620     for (mfxI32 i = 0; i < size; i++)
1621     {
1622         DCValue += RefSamples[2 * size - 1 - i] + RefSamples[i + 2 * size + 1];
1623     }
1624     DCValue /= 2 * size;
1625     memset(patch, DCValue, size*size);
1626 }
1627 
AngularPrediction(const std::vector<mfxU8> & RefSamples,mfxU8 size,IntraParams & params,mfxU8 * patch)1628 void FrameProcessor::AngularPrediction(const std::vector<mfxU8>& RefSamples, mfxU8 size, IntraParams& params, mfxU8 * patch) {
1629     if (patch == nullptr)
1630     {
1631         throw std::string("ERROR: AngularPrediction: pointer to buffer is null\n");
1632     }
1633 
1634     std::vector<mfxU8> ProjRefSamples;
1635     mfxU8 NumProj = MakeProjRefArray(RefSamples, size, params, ProjRefSamples);
1636     if (params.direction == HORIZONTAL)
1637         for (mfxI32 y = 0; y < size; y++)
1638             for (mfxI32 x = 0; x < size; x++) {
1639                 mfxI32 f = ((x + 1) * params.intraPredAngle) & 31;
1640                 mfxI32 i = ((x + 1) * params.intraPredAngle) >> 5;
1641                 if (f != 0)
1642                     patch[y * size + x] = ((32 - f) * ProjRefSamples[y + i + 1 + NumProj] + f * ProjRefSamples[y + i + 2 + NumProj] + 16) >> 5;
1643                 else
1644                     patch[y * size + x] = ProjRefSamples[y + i + 1 + NumProj];
1645             }
1646     else
1647         for (mfxI32 y = 0; y < size; y++)
1648             for (mfxI32 x = 0; x < size; x++) {
1649                 mfxI32 f = ((y + 1) * params.intraPredAngle) & 31;
1650                 mfxI32 i = ((y + 1) * params.intraPredAngle) >> 5;
1651                 if (f != 0)
1652                     patch[y * size + x] = ((32 - f) * ProjRefSamples[x + i + 1 + NumProj] + f * ProjRefSamples[x + i + 2 + NumProj] + 16) >> 5;
1653                 else
1654                     patch[y * size + x] = ProjRefSamples[x + i + 1 + NumProj];
1655             }
1656 
1657     return;
1658 }
1659 
GenerateIntraPrediction(const std::vector<mfxU8> & RefSamples,mfxU8 blockSize,INTRA_MODE currMode,mfxU8 * currPlane)1660 void FrameProcessor::GenerateIntraPrediction(const std::vector<mfxU8>& RefSamples, mfxU8 blockSize, INTRA_MODE currMode, mfxU8* currPlane)
1661 {
1662     if (currPlane == nullptr)
1663     {
1664         throw std::string("ERROR: GenerateIntraPrediction: pointer to buffer is null\n");
1665     }
1666 
1667     IntraParams params(currMode);
1668 
1669     switch (params.intraMode)
1670     {
1671     case PLANAR:
1672         PlanarPrediction(RefSamples, blockSize, currPlane);
1673         break;
1674     case DC:
1675         DCPrediction(RefSamples, blockSize, currPlane);
1676         break;
1677     default:
1678         AngularPrediction(RefSamples, blockSize, params, currPlane);
1679         break;
1680     }
1681     return;
1682 }
1683 
MakePostFilter(const std::vector<mfxU8> & RefSamples,mfxU8 size,INTRA_MODE currMode,mfxU8 * lumaPlane)1684 void FrameProcessor::MakePostFilter(const std::vector<mfxU8>& RefSamples, mfxU8 size, INTRA_MODE currMode, mfxU8* lumaPlane)
1685 {
1686     mfxU32 DCValue = lumaPlane[0];
1687 
1688     switch (currMode)
1689     {
1690     case DC:
1691         lumaPlane[0] = (RefSamples[2 * size - 1] + 2 * DCValue + RefSamples[2 * size + 1] + 2) >> 2;
1692         for (mfxI32 x = 1; x < size; x++)
1693         {
1694             lumaPlane[x] = (RefSamples[2 * size + 1 + x] + 3 * DCValue + 2) >> 2;
1695         }
1696         for (mfxI32 y = 1; y < size; y++)
1697         {
1698             lumaPlane[y * size] = (RefSamples[2 * size - 1 - y] + 3 * DCValue + 2) >> 2;
1699         }
1700         break;
1701     case ANG10_HOR:
1702         for (mfxI32 x = 0; x < size; x++)
1703         {
1704             lumaPlane[x] = ClipIntToChar(lumaPlane[x] + ((RefSamples[2 * size + 1 + x] - RefSamples[2 * size]) >> 1));
1705         }
1706         break;
1707     case ANG26_VER:
1708         for (mfxI32 y = 0; y < size; y++)
1709         {
1710             lumaPlane[y * size] = ClipIntToChar(lumaPlane[y * size] + ((RefSamples[2 * size - 1 - y] - RefSamples[2 * size]) >> 1));
1711         }
1712         break;
1713     default:
1714         return;
1715     }
1716 }
1717 
PutPatchIntoFrame(const PatchBlock & Patch,mfxFrameSurface1 & surf)1718 void  FrameProcessor::PutPatchIntoFrame(const PatchBlock & Patch, mfxFrameSurface1& surf) {
1719     //luma
1720     for (mfxU32 i = 0; i < Patch.m_BHeight; i++)
1721         memcpy(surf.Data.Y + (Patch.m_AdrY + i) * surf.Data.Pitch + Patch.m_AdrX, Patch.m_YPlane + i * Patch.m_BWidth, Patch.m_BWidth);
1722     //chroma U
1723     for (mfxU32 i = 0; i < Patch.m_BHeight / 2; ++i)
1724         memcpy(surf.Data.U + (Patch.m_AdrY / 2 + i) * surf.Data.Pitch / 2 + Patch.m_AdrX / 2, Patch.m_UPlane + i * (Patch.m_BWidth / 2), Patch.m_BWidth / 2);
1725     //chroma V
1726     for (mfxU32 i = 0; i < Patch.m_BHeight / 2; ++i)
1727         memcpy(surf.Data.V + (Patch.m_AdrY / 2 + i) * surf.Data.Pitch / 2 + Patch.m_AdrX / 2, Patch.m_VPlane + i * (Patch.m_BWidth / 2), Patch.m_BWidth / 2);
1728 }
1729 
GetIntraPredPlane(const BaseBlock & refBlock,INTRA_MODE currMode,const PatchBlock & frame,COLOR_COMPONENT colorComp,mfxU8 * currPlane)1730 void FrameProcessor::GetIntraPredPlane(const BaseBlock& refBlock, INTRA_MODE currMode, const PatchBlock& frame, COLOR_COMPONENT colorComp, mfxU8* currPlane)
1731 {
1732     if (currPlane == nullptr)
1733     {
1734         throw std::string("ERROR: GetIntraPredPlane: pointer to buffer is null\n");
1735     }
1736     //here refBlock parameters are measured in samples of corresponding colorComp
1737     //get reference samples for current TU
1738     std::vector<mfxU8> RefSamples;
1739     //size and coords of block in current color component
1740     mfxU32 cSize = (colorComp == LUMA_Y) ? refBlock.m_BHeight : (refBlock.m_BHeight / 2);
1741     mfxU32 cAdrX = (colorComp == LUMA_Y) ? refBlock.m_AdrX : (refBlock.m_AdrX / 2);
1742     mfxU32 cAdrY = (colorComp == LUMA_Y) ? refBlock.m_AdrY : (refBlock.m_AdrY / 2);
1743 
1744     FillIntraRefSamples(cSize, cAdrX, cAdrY, frame, colorComp, RefSamples);
1745 
1746     // get filter, write it into buffer and make it
1747     MakeFilter(RefSamples, cSize, currMode);
1748 
1749     //generate Prediction and return the output patch
1750     GenerateIntraPrediction(RefSamples, cSize, currMode, currPlane);
1751 
1752     if (colorComp == LUMA_Y && cSize < 32)
1753     {
1754         MakePostFilter(RefSamples, cSize, currMode, currPlane);
1755     }
1756 }
1757 
1758 
GetIntraPatchBlock(const TUBlock & refBlock,const PatchBlock & frame)1759 PatchBlock FrameProcessor::GetIntraPatchBlock(const TUBlock& refBlock, const PatchBlock& frame)
1760 {
1761     PatchBlock patch(refBlock);
1762     //get intra prediction for Luma plane
1763     GetIntraPredPlane(refBlock, refBlock.m_IntraModeLuma, frame, LUMA_Y, patch.m_YPlane);
1764     //if luma TB size > 4, fill chroma TBs of size / 2
1765     if (refBlock.m_BHeight != 4)
1766     {
1767         GetIntraPredPlane(refBlock, refBlock.m_IntraModeChroma, frame, CHROMA_U, patch.m_UPlane);
1768         GetIntraPredPlane(refBlock, refBlock.m_IntraModeChroma, frame, CHROMA_V, patch.m_VPlane);
1769         return patch;
1770     }
1771 
1772     // else luma TB has size = 4, we have one chroma TB corresponding to four 4x4 luma TBs
1773     //if refBlock is the lower-right block among four brothers in the RQT,
1774     //fill extendedPatch of size 8x8 in luma samples with:
1775     //see last paragraph before new chapter, p. 65(76) in HEVC Algorithms and Architectures
1776     if (refBlock.m_AdrX % 8 == 4 && refBlock.m_AdrY % 8 == 4)
1777     {
1778         //three luma blocks 4x4 already put into targetBlock
1779         PatchBlock extendedPatch = PatchBlock(BaseBlock(refBlock.m_AdrX - 4, refBlock.m_AdrY - 4, 8, 8), frame);
1780         //luma component of size 4x4 taken from patch
1781         extendedPatch.InsertAnotherPatch(patch);
1782         //chroma components of size 4x4 corresponding to the union of four luma blocks mentioned above
1783         GetIntraPredPlane(extendedPatch, refBlock.m_IntraModeChroma, frame, CHROMA_U, extendedPatch.m_UPlane);
1784         GetIntraPredPlane(extendedPatch, refBlock.m_IntraModeChroma, frame, CHROMA_V, extendedPatch.m_VPlane);
1785         return extendedPatch;
1786     }
1787 
1788     //else return only luma prediction
1789     return patch;
1790 }
1791 
MakeTUIntraPrediction(const TUBlock & refBlock,PatchBlock & targetPatch)1792 void FrameProcessor::MakeTUIntraPrediction(const TUBlock& refBlock, PatchBlock& targetPatch)
1793 {
1794     PatchBlock patch(refBlock);
1795     //now the most contrast mode is determined only for luma component, chroma mode is set equal to luma mode
1796     GetIntraPredPlane(refBlock, refBlock.m_IntraModeLuma, targetPatch, LUMA_Y, patch.m_YPlane);
1797     //write Patch into targetPatch
1798     targetPatch.InsertAnotherPatch(patch);
1799 }
1800 
ApplyTUIntraPrediction(const TUBlock & block,ExtendedSurface & surf)1801 void FrameProcessor::ApplyTUIntraPrediction(const TUBlock & block, ExtendedSurface& surf)
1802 {
1803     PatchBlock framePatchBlock(BaseBlock(0, 0, surf.Info.CropW, surf.Info.CropH), surf);
1804     PatchBlock patch = GetIntraPatchBlock(block, framePatchBlock);
1805     //write Patch into frame
1806     PutPatchIntoFrame(patch, surf);
1807 }
1808 
1809 //Iterates over CUs in CTU and applies intra prediction for intra CUs inside it
ApplyIntraPredInCTU(const CTUDescriptor & CTU,FrameChangeDescriptor & frame_descr)1810 void FrameProcessor::ApplyIntraPredInCTU(const CTUDescriptor & CTU, FrameChangeDescriptor & frame_descr)
1811 {
1812     for (auto& CU : CTU.m_CUVec)
1813     {
1814         if (CU.m_PredType == INTRA_PRED)
1815         {
1816             for (auto& TU : CU.m_TUVec)
1817             {
1818                 ApplyTUIntraPrediction(TU, *frame_descr.m_frame);
1819             }
1820         }
1821     }
1822 }
1823 
1824 #endif // MFX_VERSION
1825