1 // Copyright (c) 2018-2019 Intel Corporation
2 //
3 // Permission is hereby granted, free of charge, to any person obtaining a copy
4 // of this software and associated documentation files (the "Software"), to deal
5 // in the Software without restriction, including without limitation the rights
6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 // copies of the Software, and to permit persons to whom the Software is
8 // furnished to do so, subject to the following conditions:
9 //
10 // The above copyright notice and this permission notice shall be included in all
11 // copies or substantial portions of the Software.
12 //
13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 // SOFTWARE.
20
21 #include "mfxvideo.h"
22
23 #if MFX_VERSION >= MFX_VERSION_NEXT
24
25 #include "frame_processor.h"
26 #include "random_generator.h"
27
28
29 // Initialize parameters
Init(const InputParams & params)30 void FrameProcessor::Init(const InputParams ¶ms)
31 {
32 m_CropH = params.m_height;
33 m_CropW = params.m_width;
34
35 m_CTUStr = params.m_CTUStr;
36
37 // align by CUSize
38 m_Height = MSDK_ALIGN(m_CropH, m_CTUStr.CTUSize);
39 m_Width = MSDK_ALIGN(m_CropW, m_CTUStr.CTUSize);
40
41 m_HeightInCTU = m_Height / m_CTUStr.CTUSize;
42 m_WidthInCTU = m_Width / m_CTUStr.CTUSize;
43
44 m_ProcMode = params.m_ProcMode;
45
46 m_SubPelMode = params.m_SubPixelMode;
47
48 m_IsForceExtMVPBlockSize = params.m_bIsForceExtMVPBlockSize;
49 m_ForcedExtMVPBlockSize = params.m_ForcedExtMVPBlockSize;
50 m_GenMVPBlockSize = SetCorrectMVPBlockSize(params.m_GenMVPBlockSize);
51 }
52
53 // Beginning of processing of current frame. Only MOD frames are processed
54
ProcessFrame(FrameChangeDescriptor & frame_descr)55 void FrameProcessor::ProcessFrame(FrameChangeDescriptor & frame_descr)
56 {
57 try
58 {
59 switch (frame_descr.m_changeType)
60 {
61 case GEN:
62 return;
63 break;
64
65 case MOD:
66 GenCTUParams(frame_descr);
67 GenAndApplyPrediction(frame_descr);
68 UnlockSurfaces(frame_descr);
69 break;
70
71 case SKIP:
72 UnlockSurfaces(frame_descr);
73 break;
74
75 default:
76 return;
77 break;
78 }
79 }
80 catch (std::string & e) {
81 std::cout << e << std::endl;
82 throw std::string("ERROR: FrameProcessor::ProcessFrame");
83 }
84 return;
85 }
86
IsSampleAvailable(mfxU32 AdrX,mfxU32 AdrY)87 bool FrameProcessor::IsSampleAvailable(mfxU32 AdrX, mfxU32 AdrY)
88 {
89 return (AdrY < m_CropH && AdrX < m_CropW);
90 }
91
92 // Only I420 color format are supported
GetSampleI420(COLOR_COMPONENT comp,mfxU32 AdrX,mfxU32 AdrY,mfxFrameSurface1 * surf)93 mfxU8 FrameProcessor::GetSampleI420(COLOR_COMPONENT comp, mfxU32 AdrX, mfxU32 AdrY, mfxFrameSurface1* surf)
94 {
95 if (surf == nullptr)
96 {
97 throw std::string("ERROR: GetSampleI420: null pointer reference");
98 }
99
100 switch (comp)
101 {
102 case LUMA_Y:
103 return surf->Data.Y[AdrY * surf->Data.Pitch + AdrX];
104 case CHROMA_U:
105 return surf->Data.U[(AdrY / 2) * (surf->Data.Pitch / 2) + (AdrX / 2)];
106 case CHROMA_V:
107 return surf->Data.V[(AdrY / 2) * (surf->Data.Pitch / 2) + (AdrX / 2)];
108 default:
109 throw std::string("ERROR: Trying to get unspecified component");
110 }
111 }
112
GetClippedSample(COLOR_COMPONENT comp,mfxI32 X,mfxI32 Y,mfxFrameSurface1 * surf)113 mfxI32 FrameProcessor::GetClippedSample(COLOR_COMPONENT comp, mfxI32 X, mfxI32 Y, mfxFrameSurface1 * surf)
114 {
115 mfxU32 clippedX = Clip3(X, 0, (mfxI32)m_CropW);
116 mfxU32 clippedY = Clip3(Y, 0, (mfxI32)m_CropH);
117
118 return GetSampleI420(comp, clippedX, clippedY, surf);
119 }
120
121 // These functions are to be used after applying CalculateLumaPredictionSamplePreWP
122 // In specification default weighted prediction is the final scaling step for sample prediction.
123 // predSampleL0 is an interpolated Luma sample value which is calculated in CalculateLumaPredictionSamplePreWP()
124 // Output is final scaled and rounded Luma value which returns in CalculateLumaPredictionSamplePreWP()
125 // See 8.5.3.3.4.2 "Default weighted sample prediction process" from 4.0 ITU-T H.265 (V4) 2016-12-22
GetDefaultWeightedPredSample(mfxI32 predSampleLx)126 mfxU8 FrameProcessor::GetDefaultWeightedPredSample(mfxI32 predSampleLx)
127 {
128 // These shift and offset variables are defined in H265 standard for 8 bit color depth
129 constexpr mfxU16 shift1 = 6;
130 constexpr mfxU16 offset1 = 1 << (shift1 - 1); // 2^5 = 32
131
132 constexpr mfxI32 upperBound = (1 << 8) - 1; // 2^8 - 1 = 255
133 mfxI32 predSample = (predSampleLx + offset1) >> shift1;
134
135 return mfxU8(Clip3(predSample, 0, upperBound));
136 }
137
138 // Alternative in case working with B-frames
GetDefaultWeightedPredSample(mfxI32 predSampleL0,mfxI32 predSampleL1)139 mfxU8 FrameProcessor::GetDefaultWeightedPredSample(mfxI32 predSampleL0, mfxI32 predSampleL1)
140 {
141 // These shift and offset variables are defined in H265 standard for 8 bit color depth
142 mfxU16 shift2 = 7;
143 mfxU16 offset2 = 1 << (shift2 - 1); // 2^6 = 64
144
145 mfxI32 upperBound = (1 << 8) - 1; // 2^8 - 1 = 255
146
147 mfxI32 predSample = (predSampleL0 + predSampleL1 + offset2) >> shift2;
148
149 return mfxU8(Clip3(predSample, 0, upperBound));
150 }
151
GenRandomQuadTreeStructure(QuadTree & QT,mfxU8 minDepth,mfxU8 maxDepth)152 void FrameProcessor::GenRandomQuadTreeStructure(QuadTree& QT, mfxU8 minDepth, mfxU8 maxDepth)
153 {
154 if (!QT.IsEmpty())
155 {
156 QT.Clear();
157 }
158
159 GenRandomQuadTreeSubstrRecur(QT.root, minDepth, maxDepth);
160 }
161
GenRandomQuadTreeSubstrRecur(QuadTreeNode & node,mfxU8 minDepth,mfxU8 maxDepth)162 void FrameProcessor::GenRandomQuadTreeSubstrRecur(QuadTreeNode& node, mfxU8 minDepth, mfxU8 maxDepth)
163 {
164 if (node.m_Level < minDepth ||
165 (node.m_Level < maxDepth && GetRandomGen().GetRandomBit()))
166 {
167 node.MakeChildren();
168 for (auto& child : node.m_Children)
169 {
170 GenRandomQuadTreeSubstrRecur(child, minDepth, maxDepth);
171 }
172 }
173 return;
174 }
175
176 //Make a quad-tree structure inside ctu's QuadTree from FEI output
GenQuadTreeInCTUWithBitMask(CTUDescriptor & CTU,mfxU32 bitMask)177 void FrameProcessor::GenQuadTreeInCTUWithBitMask(CTUDescriptor& CTU, mfxU32 bitMask)
178 {
179 if (bitMask & 1)
180 {
181 CTU.m_CUQuadTree.root.MakeChildren();
182 GenQuadTreeWithBitMaskRecur(CTU.m_CUQuadTree.root, bitMask >> 1);
183 }
184
185 return;
186 }
187
GenQuadTreeWithBitMaskRecur(QuadTreeNode & node,mfxU32 bitMask)188 void FrameProcessor::GenQuadTreeWithBitMaskRecur(QuadTreeNode& node, mfxU32 bitMask)
189 {
190 for (mfxU32 i = 0; i < 4; i++)
191 {
192 if (bitMask & (1 << i))
193 {
194 node.m_Children[i].MakeChildren();
195
196 if (node.m_Children[i].m_Level < 2)
197 {
198 GenQuadTreeWithBitMaskRecur(node.m_Children[i], bitMask >> 4 * (i + 1));
199 }
200 }
201 }
202
203 return;
204 }
205
206 //Fills CU vector inside CTU with correct CU blocks in CTU quad-tree z-scan order
207 //For each CU, depending on the test type, its prediction mode is selected
208 //and further CU partitioning/mode selection is made (i.e. PU and TU partitioning
209 //and per TU intra mode selection)
GenCUVecInCTU(CTUDescriptor & ctu,mfxU16 testType)210 void FrameProcessor::GenCUVecInCTU(CTUDescriptor& ctu, mfxU16 testType)
211 {
212 QuadTree& QT = ctu.m_CUQuadTree;
213 std::vector<BaseBlock> tmpVec;
214 QT.GetQuadTreeBlocksRecur(QT.root, ctu.m_AdrX, ctu.m_AdrY, ctu.m_BHeight, tmpVec);
215
216 for (auto& block : tmpVec)
217 {
218 CUBlock cu_block(block);
219
220 if (testType & GENERATE_INTER && testType & GENERATE_INTRA)
221 {
222 cu_block.m_PredType = (GetRandomGen().GetRandomBit()) ?
223 INTRA_PRED : INTER_PRED;
224 }
225 else if (testType & GENERATE_INTER)
226 {
227 cu_block.m_PredType = INTER_PRED;
228 }
229 else
230 {
231 cu_block.m_PredType = INTRA_PRED;
232 }
233
234 if (cu_block.m_PredType == INTRA_PRED)
235 {
236 MakeIntraCU(cu_block);
237 }
238 else if (cu_block.m_PredType == INTER_PRED)
239 {
240 MakeInterCU(cu_block, testType);
241 }
242
243 ctu.m_CUVec.push_back(cu_block);
244 }
245 }
246
GetRefSampleAvailFlagsForTUsInCTU(CTUDescriptor & CTU)247 void FrameProcessor::GetRefSampleAvailFlagsForTUsInCTU(CTUDescriptor & CTU)
248 {
249 QuadTree& QT = CTU.m_CUQuadTree;
250 std::vector<RefSampleAvail> CURefSampleAvailVec;
251 //CTU is set with RefSamples available in both directions here.
252 //this should be made by guaranteeing that CTUs are far from each other
253 //(at least one initial CTU between two substituted CTUs in both directions)
254 QT.GetQuadTreeRefSampleAvailVector(QT.root, CTU, CTU, CURefSampleAvailVec);
255
256 if (CURefSampleAvailVec.size() != CTU.m_CUVec.size())
257 {
258 throw std::string("ERROR: GetRefSampleAvailFlagsForTUsInCTU: mismatching CU and RefSampleAvail vector sizes");
259 }
260
261 for (mfxU32 i = 0; i < CTU.m_CUVec.size(); i++)
262 {
263 CUBlock & CU = CTU.m_CUVec[i];
264 if (CU.m_PredType == INTRA_PRED)
265 {
266 QuadTree& RQT = CU.m_TUQuadTree;
267 std::vector<RefSampleAvail> TURefSampleAvailVec;
268
269 RQT.GetQuadTreeRefSampleAvailVectorRecur(RQT.root, CU, CURefSampleAvailVec[i], CTU, TURefSampleAvailVec);
270 for (mfxU32 j = 0; j < CU.m_TUVec.size(); j++)
271 {
272 TUBlock& TU = CU.m_TUVec[j];
273 TU.m_RefSampleAvail = TURefSampleAvailVec[j];
274 }
275 }
276 }
277 }
278
GenRandomTUQuadTreeInCU(CUBlock & cu_block)279 void FrameProcessor::GenRandomTUQuadTreeInCU(CUBlock& cu_block)
280 {
281 QuadTree& quadTreeTU = cu_block.m_TUQuadTree;
282 mfxU32 minTUDepth = std::max(0, mfxI32(CeilLog2(cu_block.m_BHeight) - m_CTUStr.maxLog2TUSize));
283 mfxU32 maxTUDepth = std::min(CeilLog2(cu_block.m_BHeight) - m_CTUStr.minLog2TUSize, m_CTUStr.maxTUQTDepth);
284
285 GenRandomQuadTreeStructure(quadTreeTU, minTUDepth, maxTUDepth);
286 }
287
288
289 //Make a quad-tree structure inside ctu's QuadTree so that all CU blocks inside CTU
290 //have a size smaller than specified by maxLog2CUSize and larger than specified by minLog2CUSize
GenRandomCUQuadTreeInCTU(CTUDescriptor & ctu)291 void FrameProcessor::GenRandomCUQuadTreeInCTU(CTUDescriptor& ctu)
292 {
293 QuadTree& quadTreeCU = ctu.m_CUQuadTree;
294 mfxU32 minCUDepth = std::max(0, mfxI32(CeilLog2(ctu.m_BHeight) - m_CTUStr.maxLog2CUSize));
295 mfxU32 maxCUDepth = CeilLog2(ctu.m_BHeight) - m_CTUStr.minLog2CUSize;
296 GenRandomQuadTreeStructure(quadTreeCU, minCUDepth, maxCUDepth);
297 }
298
299 //TODO:
IsBlockUniform(const BaseBlock & block,PatchBlock & frame)300 bool FrameProcessor::IsBlockUniform(const BaseBlock& block, PatchBlock& frame)
301 {
302 return false;
303 }
304
305 //TODO:
AlterBorderSamples(const BaseBlock & block,PatchBlock & frame)306 void FrameProcessor::AlterBorderSamples(const BaseBlock& block, PatchBlock& frame)
307 {
308
309 }
310
ChooseContrastIntraMode(const BaseBlock & block,std::vector<TUBlock> & block_vec,PatchBlock & frame)311 void FrameProcessor::ChooseContrastIntraMode(const BaseBlock& block, std::vector<TUBlock>& block_vec, PatchBlock& frame)
312 {
313 if (IsBlockUniform(block, frame))
314 {
315 AlterBorderSamples(block, frame);
316 }
317
318 //distance between initial block and intra predicted patch should be maximized
319 mfxU32 maxDist = 0; //max num of diff that can be reached is 32x32x256 = 2^18
320 //initial block filled with samples from the frame
321 PatchBlock refBlock(block, frame);
322 //here Patch with max distance from the initBlock will be stored
323 PatchBlock maxDistPatch(block);
324 //mode corresponding to maxDistPatch
325 INTRA_MODE maxDistMode = PLANAR;
326
327 //choose TUs inside block
328 std::vector<TUBlock> TUInCurrBlock;
329 for (auto& TU : block_vec)
330 {
331 if (TU.IsInBlock(block))
332 {
333 TUInCurrBlock.emplace_back(TU);
334 }
335 }
336
337 //check whether there is TU inside block with no left-down or up-right refSamples available
338 //if so, we can't use corresponding modes and we limit maxModeAvail minModeAvail
339 // and minAngModeAvail in appropriate manner
340 //if left-down samples aren't available, modes 2 - 9 are prohibited
341 //if up-right samples aren't available, modes 27 - 34 are prohibited
342 //in both cases planar mode is prohibited because it uses samples p[-1][N] and p[N][-1]
343 //in coordinates relative to the block, where N is the size of block
344 //see HEVC algorighms and structures page 101(112)
345 //Vivienne Sze Madhukar Budagavi Gary J.Sullivan High Efficiency Video Coding(HEVC) Algorithms and Architectures 2014
346
347 mfxU32 minModeAvail = INTRA_MODE::PLANAR;
348 mfxU32 minAngModeAvail = INTRA_MODE::ANG2;
349 mfxU32 maxModeAvail = INTRA_MODE::ANG34;
350 for (auto& TU : TUInCurrBlock)
351 {
352 if (!TU.m_RefSampleAvail.LeftDown)
353 {
354 minAngModeAvail = INTRA_MODE::ANG10_HOR;
355 minModeAvail = INTRA_MODE::DC;
356 }
357 if (!TU.m_RefSampleAvail.UpRight)
358 {
359 maxModeAvail = INTRA_MODE::ANG26_VER;
360 minModeAvail = INTRA_MODE::DC;
361 }
362 }
363
364 for (mfxU32 i = minModeAvail; i <= maxModeAvail; i++)
365 {
366 if (i < INTRA_MODE::ANG2 || i >= minAngModeAvail)
367 {
368 //iterate over TUs of current CU and save predicted TUs into frame
369 for (auto& TU : TUInCurrBlock)
370 {
371 TU.m_IntraModeChroma = TU.m_IntraModeLuma = INTRA_MODE(i);
372 MakeTUIntraPrediction(TU, frame);
373 }
374 //get curPatch from frame
375 PatchBlock curPatch(block, frame);
376 //count distance between initial block and current patch
377 mfxU32 curDist = refBlock.CalcYSAD(curPatch);
378 //save patch which is the farthest from the initial CU block
379 if (curDist > maxDist)
380 {
381 maxDist = curDist;
382 maxDistPatch = curPatch;
383 maxDistMode = INTRA_MODE(i);
384 }
385 }
386 }
387
388 for (auto& TU : block_vec)
389 {
390 if (TU.IsInBlock(block))
391 {
392 //for now chroma intra mode is set equal to luma intra mode
393 TU.m_IntraModeChroma = TU.m_IntraModeLuma = maxDistMode;
394 }
395 }
396
397 frame.InsertAnotherPatch(maxDistPatch);
398
399 return;
400 }
401
402 //Generates a quad-tree TU structure inside the cu_block and fills the TU block vector
403 //inside cu_block with correct TU blocks corresponding to the CU quad-tree TU structure
404 //Selects an intra mode for each generated TU
MakeIntraCU(CUBlock & cu_block)405 void FrameProcessor::MakeIntraCU(CUBlock& cu_block)
406 {
407 QuadTree& QT = cu_block.m_TUQuadTree;
408 GenRandomTUQuadTreeInCU(cu_block);
409
410 std::vector<BaseBlock> tmpVec;
411 QT.GetQuadTreeBlocksRecur(QT.root, cu_block.m_AdrX, cu_block.m_AdrY, cu_block.m_BHeight, tmpVec);
412 cu_block.m_TUVec.clear();
413 for (auto& block : tmpVec)
414 {
415 cu_block.m_TUVec.emplace_back(TUBlock(block, PLANAR, PLANAR));
416 }
417
418 mfxU32 minCUSize = 1 << m_CTUStr.minLog2CUSize;
419 //Special case for CUs of size equal to minCUSize:
420 //we can choose intra mode for every quarter
421 //see last paragraph on p.228(238) in HEVC Algorithms and Architectures
422 if (cu_block.m_BHeight == minCUSize && cu_block.m_BWidth == minCUSize
423 && tmpVec.size() >= 4 && GetRandomGen().GetRandomBit())
424 {
425 cu_block.m_IntraPartMode = INTRA_NxN;
426 }
427 else
428 {
429 cu_block.m_IntraPartMode = INTRA_2Nx2N;
430 }
431 }
432
MakeIntraPredInCTU(CTUDescriptor & ctu,FrameChangeDescriptor & descr)433 void FrameProcessor::MakeIntraPredInCTU(CTUDescriptor& ctu, FrameChangeDescriptor & descr)
434 {
435 ExtendedSurface& surf = *descr.m_frame;
436 //save frame data in temporary patchBlock
437 PatchBlock framePatchBlock(BaseBlock(0, 0, surf.Info.CropW, surf.Info.CropH), surf);
438 for (auto& cu : ctu.m_CUVec)
439 {
440 if (cu.m_PredType == INTRA_PRED)
441 {
442 if (cu.m_IntraPartMode == INTRA_NxN)
443 {
444 std::vector<BaseBlock> childrenBlocks;
445 cu.GetChildBlock(childrenBlocks);
446 for (mfxU32 i = 0; i < 4; i++)
447 {
448 //choose the most contrast intra mode for every quarter of CU
449 ChooseContrastIntraMode(childrenBlocks[i], cu.m_TUVec, framePatchBlock);
450 }
451 }
452 else
453 {
454 ChooseContrastIntraMode(cu, cu.m_TUVec, framePatchBlock);
455 }
456 }
457 }
458 }
459
460 //Chooses the inter partitioning mode for the CU and fills the PU vector inside it with PUs
461 //corresponding to the chosen mode
MakeInterCU(CUBlock & cu_block,mfxU16 testType)462 void FrameProcessor::MakeInterCU(CUBlock& cu_block, mfxU16 testType)
463 {
464 INTER_PART_MODE mode = INTER_PART_MODE::INTER_NONE;
465
466 mfxU32 max_mode_num = -1;
467
468 if (!(testType & GENERATE_SPLIT) || !m_CTUStr.bCUToPUSplit)
469 {
470 mode = INTER_PART_MODE::INTER_2Nx2N; //If no split is specified or CU to PU split is forbidden,
471 //the CU will contain a single PU
472 }
473 else
474 {
475 if (cu_block.m_BHeight == 8 && cu_block.m_BWidth == 8)
476 {
477 //Minimum PU size is 4x8 or 8x4, which means that only first 3 inter partitioning
478 //modes are available for 8x8 CU
479 //Condition when only symmetric modes are supported in case of 8x8 CU is satisfied by default
480 max_mode_num = INTER_8x8CU_PART_MODE_NUM - 1;
481 }
482 else if (m_CTUStr.bForceSymmetricPU)
483 {
484 //No check for 8x8 CU case here. It is already processed
485 max_mode_num = INTER_SYMM_PART_MODE_NUM - 1;
486 }
487 else
488 {
489 max_mode_num = INTER_PART_MODE_NUM - 1;
490 }
491
492 bool isCUMinSized = (CeilLog2(cu_block.m_BHeight) == m_CTUStr.minLog2CUSize);
493 // CU split into 4 square PUs is only allowed for the CUs
494 // of the smallest size (see p.61-62 of doi:10.1007/978-3-319-06895-4)
495 do
496 {
497 mode = (INTER_PART_MODE) GetRandomGen().GetRandomNumber(0, max_mode_num);
498 }
499 while (!isCUMinSized && mode == INTER_PART_MODE::INTER_NxN);
500
501 }
502
503 cu_block.BuildPUsVector(mode);
504 cu_block.m_InterPartMode = mode;
505 }
506
CeilLog2(mfxU32 size)507 mfxU8 FrameProcessor::CeilLog2(mfxU32 size)
508 {
509 mfxU8 ret = 0;
510 while (size > 1)
511 {
512 size /= 2;
513 ret++;
514 }
515 return ret;
516 }
517
518 // First round of generation: choose CTU on current MOD frame
519 //
520 // FrameChangeDescriptor & frameDescr - descriptor of current MOD frame
521
GenCTUParams(FrameChangeDescriptor & frame_descr)522 void FrameProcessor::GenCTUParams(FrameChangeDescriptor & frame_descr)
523 {
524 mfxI32 maxAttempt = 100;
525
526 // Try no more than maxAttempt times to generate no more than m_CTUStr.CTUMaxNum CTUs
527 for (mfxI32 i = 0; i < maxAttempt && frame_descr.m_vCTUdescr.size() < m_CTUStr.CTUMaxNum; ++i)
528 {
529 // TODO: this part could be improved with aggregate initialization
530 CTUDescriptor tempCTUDsc;
531
532 // Do not choose last CTU in row/column to avoid effects of alignment
533 // m_WidthInCTU-1 is a coordinate of last CTU in row
534 tempCTUDsc.m_AdrXInCTU = GetRandomGen().GetRandomNumber(0, m_WidthInCTU - 2);
535 tempCTUDsc.m_AdrYInCTU = GetRandomGen().GetRandomNumber(0, m_HeightInCTU - 2);
536
537 // Calculate pixel coordinates and size
538 tempCTUDsc.m_AdrX = tempCTUDsc.m_AdrXInCTU * m_CTUStr.CTUSize;
539 tempCTUDsc.m_AdrY = tempCTUDsc.m_AdrYInCTU * m_CTUStr.CTUSize;
540 tempCTUDsc.m_BWidth = tempCTUDsc.m_BHeight = m_CTUStr.CTUSize;
541
542 // Checks if current CTU intersects with or is too close to any of already generated
543 auto it = find_if(frame_descr.m_vCTUdescr.begin(), frame_descr.m_vCTUdescr.end(),
544 [&](const CTUDescriptor& dscr){ return dscr.CheckForIntersect(tempCTUDsc, m_CTUStr.CTUSize * m_CTUStr.CTUDist, m_CTUStr.CTUSize * m_CTUStr.CTUDist); });
545
546 if (it == frame_descr.m_vCTUdescr.end())
547 {
548 // If no intersection, put generated block to vector
549 frame_descr.m_vCTUdescr.push_back(std::move(tempCTUDsc));
550 }
551 }
552
553 return;
554 }
555
556 // Second round of test: Generate partitions and MVs. Write down pixels to MOD and all reference GEN frames
557 // Remove a CTU generated in the previous round from the test set
558 // if it is impossible to place it without intersecting other CTUs
559 //
560 // FrameChangeDescriptor & frameDescr - descriptor of current MOD frame
561
GenAndApplyPrediction(FrameChangeDescriptor & frameDescr)562 void FrameProcessor::GenAndApplyPrediction(FrameChangeDescriptor & frameDescr)
563 {
564 // Iterate over all generated in GenCTUParams CTUs (see round 1)
565 auto it_ctu = frameDescr.m_vCTUdescr.begin();
566
567 if (!(frameDescr.m_testType & GENERATE_SPLIT))
568 {
569 //If no split is specified, set min/max CU size to CTU size
570 //so that the CTU quad-tree only contains single node after generation
571 m_CTUStr.minLog2CUSize = CeilLog2(m_CTUStr.CTUSize);
572 m_CTUStr.maxLog2CUSize = CeilLog2(m_CTUStr.CTUSize);
573 }
574
575
576 while (it_ctu != frameDescr.m_vCTUdescr.end())
577 {
578 auto &CTU = *it_ctu;
579
580 //make a tree and save CUs into the vector inside CTU
581 GenRandomCUQuadTreeInCTU(CTU);
582 GenCUVecInCTU(CTU, frameDescr.m_testType);
583 if (frameDescr.m_testType & GENERATE_INTRA)
584 {
585 GetRefSampleAvailFlagsForTUsInCTU(CTU);
586 }
587
588 FrameOccRefBlockRecord bak = frameDescr.BackupOccupiedRefBlocks();
589
590 bool bMVGenSuccess = true;
591 if (frameDescr.m_testType & GENERATE_INTER)
592 {
593 bMVGenSuccess = MakeInterPredInCTU(CTU, frameDescr);
594 }
595 if (bMVGenSuccess)
596 {
597 //Inter prediction must be applied first
598 //because intra blocks should use noise pixels from
599 //adjacent inter CUs and not unchanged picture pixels
600 //in the same spot
601 ApplyInterPredInCTU(CTU, frameDescr);
602
603 //most contrast intra mode is chosen here
604 MakeIntraPredInCTU(CTU, frameDescr);
605 ApplyIntraPredInCTU(CTU, frameDescr);
606 it_ctu++;
607 }
608 else
609 {
610 //Unable to put the CTU into reference frames without intersection
611 //Restore backup reference block info:
612 frameDescr.RestoreOccupiedRefBlocks(bak);
613
614 //Remove current CTU from the test block list and updating the iterator
615 it_ctu = frameDescr.m_vCTUdescr.erase(it_ctu);
616 }
617 }
618
619 return;
620 }
621
622 //Generates MV and MVP for all PUs in CTU
MakeInterPredInCTU(CTUDescriptor & CTU,FrameChangeDescriptor & frameDescr)623 bool FrameProcessor::MakeInterPredInCTU(CTUDescriptor& CTU, FrameChangeDescriptor& frameDescr)
624 {
625 // First, need to construct an MVP grid and vector pools according to CTU partioning
626 MVMVPProcessor mvmvpProcessor(m_GenMVPBlockSize, m_SubPelMode);
627
628 if (frameDescr.m_testType & GENERATE_PREDICTION)
629 {
630 mvmvpProcessor.InitMVPGridData(CTU, frameDescr);
631 }
632
633 for (auto& CU : CTU.m_CUVec)
634 {
635 if (CU.m_PredType == INTER_PRED)
636 {
637 for (auto& PU : CU.m_PUVec)
638 {
639 // Set prediction flags for each PU
640 GenPredFlagsForPU(PU, frameDescr.m_frameType);
641
642 bool bMVGenSuccess = false;
643 if (frameDescr.m_testType & GENERATE_PREDICTION)
644 {
645 bMVGenSuccess = mvmvpProcessor.GenValidMVMVPForPU(PU, frameDescr);
646 }
647 else
648 {
649 bMVGenSuccess = mvmvpProcessor.GenValidMVForPU(PU, frameDescr);
650 }
651
652 if (bMVGenSuccess)
653 {
654 // Store the shifted PU as a BaseBlock in the corresponding reference frame descriptor
655 if (PU.predFlagL0)
656 {
657 auto itL0 = std::next(frameDescr.m_refDescrList0.begin(), PU.m_MV.RefIdx.RefL0);
658 itL0->m_OccupiedRefBlocks.emplace_back(PU.GetShiftedBaseBlock(L0));
659 }
660
661 if (PU.predFlagL1)
662 {
663 auto itL1 = std::next(frameDescr.m_refDescrList1.begin(), PU.m_MV.RefIdx.RefL1);
664 itL1->m_OccupiedRefBlocks.emplace_back(PU.GetShiftedBaseBlock(L1));
665 }
666 }
667 else
668 {
669 //MV prediction has failed; need to discard the whole CTU
670 return false;
671 }
672 }
673 }
674 }
675
676 //MVs for all PUs in CTU have been generated
677
678 // If predictors required, put them to ext buffer
679 if (frameDescr.m_testType & GENERATE_PREDICTION && frameDescr.m_procMode == GENERATE)
680 {
681 //Only output predictors if CTU contains at least one inter CU
682 auto it = std::find_if(CTU.m_CUVec.begin(), CTU.m_CUVec.end(),
683 [](const CUBlock& CU) { return CU.m_PredType == INTER_PRED; });
684
685 if (it != CTU.m_CUVec.end())
686 {
687 mvmvpProcessor.FillFrameMVPExtBuffer(frameDescr);
688 }
689 }
690
691 if ((frameDescr.m_testType & GENERATE_PREDICTION) && frameDescr.m_procMode == VERIFY)
692 {
693 mvmvpProcessor.GetMVPPools(CTU.m_MVPGenPools);
694 }
695
696 return true;
697 }
698
699
GenPredFlagsForPU(PUBlock & PU,mfxU16 frameType)700 void FrameProcessor::GenPredFlagsForPU(PUBlock & PU, mfxU16 frameType)
701 {
702 PU.predFlagL0 = true; // Set prediction flag for P-frames
703 if (frameType & MFX_FRAMETYPE_B)
704 {
705 // Each PU requires 1 or more reference
706 mfxI32 maxValue = 2;
707 if (PU.m_BWidth == 4 || PU.m_BHeight == 4)
708 maxValue = 1; // For PUs 8x4 or 4x8 we can use only unidirectional prediction
709
710 switch (GetRandomGen().GetRandomNumber(0, maxValue))
711 {
712 case 0:
713 PU.predFlagL0 = true;
714 PU.predFlagL1 = false;
715 break;
716 case 1:
717 PU.predFlagL0 = false;
718 PU.predFlagL1 = true;
719 break;
720 case 2:
721 PU.predFlagL0 = true;
722 PU.predFlagL1 = true;
723 break;
724 }
725 }
726
727 return;
728 }
729
730 //Iterates over CUs in CTU and applies inter prediction for inter CUs
ApplyInterPredInCTU(CTUDescriptor & CTU,FrameChangeDescriptor & frameDescr)731 void FrameProcessor::ApplyInterPredInCTU(CTUDescriptor& CTU, FrameChangeDescriptor & frameDescr)
732 {
733 for (auto& CU : CTU.m_CUVec)
734 {
735 if (CU.m_PredType == INTER_PRED)
736 {
737 for (auto& PU : CU.m_PUVec)
738 {
739 // Advance to reference descriptor of desired frame
740 // NB: using reverse iterator here because RefIdx starts counting from the end of the list
741
742 // Generate noisy PU in GEN frames pixels
743 // This function should be called in both (generate and verify) modes, because includes work with random generator
744 PutNoiseBlocksIntoFrames(PU, frameDescr);
745
746 // Trace back those pixels (using q-pel interpolation if required) and put to current MOD frame
747 // This function should be called only in the generate mode
748 if (m_ProcMode == GENERATE)
749 {
750 TraceBackAndPutBlockIntoFrame(PU, frameDescr);
751 }
752 }
753 }
754 }
755 }
756
757 // Generate and put noisy pixels to surface surf, which corresponds to block BP coordinates
758
759 // Fill the block with 4x4 noise blocks
PutNoiseBlocksIntoFrames(const PUBlock & PU,const FrameChangeDescriptor & frameDescr,mfxU32 num_coeff,mfxU32 level)760 void FrameProcessor::PutNoiseBlocksIntoFrames(const PUBlock & PU, const FrameChangeDescriptor & frameDescr, mfxU32 num_coeff, mfxU32 level)
761 {
762 BaseBlock BPL0, BPL1;
763
764 mfxFrameSurface1* refSurfL0 = nullptr;
765 mfxFrameSurface1* refSurfL1 = nullptr;
766
767 if (PU.predFlagL0)
768 {
769 if (frameDescr.m_refDescrList0.size() <= PU.m_MV.RefIdx.RefL0)
770 {
771 throw std::string("ERROR: PutNoiseBlockIntoFrames: incorrect reference index for list 0");
772 }
773 auto &refDescrL0 = *next(frameDescr.m_refDescrList0.begin(), PU.m_MV.RefIdx.RefL0);
774
775 // Get frame of reference GEN frame
776 refSurfL0 = refDescrL0.m_frame;
777 if (refSurfL0 == nullptr)
778 {
779 throw std::string("ERROR: PutNoiseBlockIntoFrames: null pointer reference");
780 }
781
782 BPL0 = PU.GetShiftedBaseBlock(L0);
783 }
784
785 if (PU.predFlagL1)
786 {
787 if (frameDescr.m_refDescrList1.size() <= PU.m_MV.RefIdx.RefL1)
788 {
789 throw std::string("ERROR: PutNoiseBlockIntoFrames: incorrect reference index for list 1");
790 }
791 auto &refDescrL1 = *next(frameDescr.m_refDescrList1.begin(), PU.m_MV.RefIdx.RefL1);
792
793 // Get frame of reference GEN frame
794 refSurfL1 = refDescrL1.m_frame;
795 if (refSurfL1 == nullptr)
796 {
797 throw std::string("ERROR: PutNoiseBlockIntoFrames: null pointer reference");
798 }
799
800 BPL1 = PU.GetShiftedBaseBlock(L1);
801 }
802
803 //Check that PU can be subdivided evenly into 4x4 blocks
804 if (PU.m_BWidth % 4 || PU.m_BHeight % 4)
805 {
806 throw std::string("ERROR: PutNoiseBlockIntoFrame: invalid block size");
807 }
808 mfxU32 Block4x4NumX = PU.m_AdrX / 4;
809 mfxU32 Block4x4NumY = PU.m_AdrY / 4;
810 mfxU32 FrameWidthIn4x4Blocks = (m_Width + 3) / 4;
811 mfxU32 FrameHeightIn4x4Blocks = (m_Height + 3) / 4;
812
813 mfxU32 seedOff = frameDescr.m_frame->Data.FrameOrder * FrameWidthIn4x4Blocks * FrameHeightIn4x4Blocks;
814 mfxU32 seed = 0;
815
816 mfxU32 BPWidthIn4x4Blocks = PU.m_BWidth / 4;
817 mfxU32 BPHeightIn4x4Blocks = PU.m_BHeight / 4;
818 mfxU8 block[16] = {}, blockAdjusted[16] = {};
819 mfxI8 blockDeltaL0[16] = {}, blockDeltaL1[16] = {};
820
821 for (mfxU32 i = 0; i < BPHeightIn4x4Blocks; i++)
822 {
823 for (mfxU32 j = 0; j < BPWidthIn4x4Blocks; j++)
824 {
825 //Calculate seed from the 4x4 block position inside the PU
826 seed = seedOff + FrameWidthIn4x4Blocks * (Block4x4NumY + i) + Block4x4NumX + j;
827 GetRandomGen().SeedGenerator(seed);
828
829 //Calculate noise pixel values
830 FillInBlock4x4(num_coeff, level, block);
831
832 //Calculate delta for difference between L0 and L1 references
833 if (PU.predFlagL0 && PU.predFlagL1)
834 FillDeltaBlocks4x4(blockDeltaL0, blockDeltaL1);
835
836 if (m_ProcMode == GENERATE)
837 {
838 if (PU.predFlagL0)
839 {
840 ApplyDeltaPerPixel(PU, blockDeltaL0, block, blockAdjusted);
841 PutBlock4x4(BPL0.m_AdrX + j * 4, BPL0.m_AdrY + i * 4, blockAdjusted, refSurfL0);
842 }
843
844 if (PU.predFlagL1)
845 {
846 ApplyDeltaPerPixel(PU, blockDeltaL1, block, blockAdjusted);
847 PutBlock4x4(BPL1.m_AdrX + j * 4, BPL1.m_AdrY + i * 4, blockAdjusted, refSurfL1);
848 }
849 }
850 }
851 }
852 }
853
854 // Fill block[16] with noise pixels, using up to num_coeff first random DCT coefficients
855 // in the range of (-level; +level)
FillInBlock4x4(mfxU32 num_coeff,mfxU32 level,mfxU8 block[16])856 void FrameProcessor::FillInBlock4x4(mfxU32 num_coeff, mfxU32 level, mfxU8 block[16])
857 {
858 if (block == nullptr)
859 {
860 throw std::string("ERROR: FillInBlock4x4: null pointer reference");
861 }
862
863 if (num_coeff < 1 || num_coeff > 16)
864 {
865 throw std::string("\nERROR: Wrong num_coeff in FrameProcessor::FillInBlock4x4");
866 }
867 if (level > 255)
868 {
869 throw std::string("\nERROR: Wrong level in FrameProcessor::FillInBlock4x4");
870 }
871
872 mfxI32 coeff[16];
873 mfxI32 pixels[16];
874 mfxU32 scan[16] = { 0,1,4,8,5,2,3,6,9,12,13,10,7,11,14,15 };
875 memset(coeff, 0, sizeof(coeff));
876 for (mfxU32 i = 0; i < num_coeff; i++)
877 {
878 coeff[scan[i]] = level* GetRandomGen().GetRandomNumber(0, 256) / 256 - level / 2;
879 }
880 if (m_ProcMode == GENERATE)
881 {
882 Inverse4x4(coeff, 4, pixels, 4);
883 for (mfxU32 i = 0; i < 16; i++)
884 {
885 block[i] = ClipIntToChar(128 + pixels[i]);
886 }
887 }
888
889 return;
890 }
891
FillDeltaBlocks4x4(mfxI8 blockL0[16],mfxI8 blockL1[16])892 void FrameProcessor::FillDeltaBlocks4x4(mfxI8 blockL0[16], mfxI8 blockL1[16])
893 {
894 if (blockL0 == nullptr || blockL1 == nullptr)
895 {
896 throw std::string("ERROR: FillDeltaBlocks4x4: null pointer reference");
897 }
898
899 for (mfxU32 i = 0; i < 16; i++)
900 {
901 blockL0[i] = GetRandomGen().GetRandomNumber(0, DELTA_PIXEL_BI_DIRECT);
902 blockL1[i] = -blockL0[i];
903 }
904
905
906 return;
907 }
908
ApplyDeltaPerPixel(const PUBlock & PU,const mfxI8 deltaBlock[16],const mfxU8 inBlock[16],mfxU8 outBlock[16])909 void FrameProcessor::ApplyDeltaPerPixel(const PUBlock & PU, const mfxI8 deltaBlock[16], const mfxU8 inBlock[16], mfxU8 outBlock[16])
910 {
911 if (inBlock == nullptr || outBlock == nullptr)
912 {
913 throw std::string("ERROR: ApplyDeltaPerPixel: null pointer reference");
914 }
915
916 for (mfxU8 i = 0; i < 16; i++)
917 {
918 outBlock[i] = ClipIntToChar(inBlock[i] + deltaBlock[i]);
919 }
920
921 return;
922 }
923
ClipIntToChar(mfxI32 x)924 mfxU8 FrameProcessor::ClipIntToChar(mfxI32 x)
925 {
926 if (x < 0)
927 return 0;
928 else if (x > 255)
929 return 255;
930
931 return (mfxU8)x;
932 }
933
934 //Perform inverse 4x4 DCT
Inverse4x4(mfxI32 * src,mfxU32 s_pitch,mfxI32 * dst,mfxU32 d_pitch)935 void FrameProcessor::Inverse4x4(mfxI32 *src, mfxU32 s_pitch, mfxI32 *dst, mfxU32 d_pitch)
936 {
937 if (src == nullptr || dst == nullptr)
938 {
939 throw std::string("ERROR: Inverse4x4: null pointer reference");
940 }
941 const mfxU32 BLOCK_SIZE = 4;
942 mfxI32 tmp[16];
943 mfxI32 *pTmp = tmp, *pblock;
944 mfxI32 p0, p1, p2, p3;
945 mfxI32 t0, t1, t2, t3;
946
947 // Horizontal
948 for (mfxU32 i = 0; i < BLOCK_SIZE; i++)
949 {
950 pblock = src + i*s_pitch;
951 t0 = *(pblock++);
952 t1 = *(pblock++);
953 t2 = *(pblock++);
954 t3 = *(pblock);
955
956 p0 = t0 + t2;
957 p1 = t0 - t2;
958 p2 = (t1 >> 1) - t3;
959 p3 = t1 + (t3 >> 1);
960
961 *(pTmp++) = p0 + p3;
962 *(pTmp++) = p1 + p2;
963 *(pTmp++) = p1 - p2;
964 *(pTmp++) = p0 - p3;
965 }
966
967 // Vertical
968 for (mfxU32 i = 0; i < BLOCK_SIZE; i++)
969 {
970 pTmp = tmp + i;
971 t0 = *pTmp;
972 t1 = *(pTmp += BLOCK_SIZE);
973 t2 = *(pTmp += BLOCK_SIZE);
974 t3 = *(pTmp += BLOCK_SIZE);
975
976 p0 = t0 + t2;
977 p1 = t0 - t2;
978 p2 = (t1 >> 1) - t3;
979 p3 = t1 + (t3 >> 1);
980
981 *(dst + 0 * d_pitch + i) = p0 + p3;
982 *(dst + 1 * d_pitch + i) = p1 + p2;
983 *(dst + 2 * d_pitch + i) = p1 - p2;
984 *(dst + 3 * d_pitch + i) = p0 - p3;
985 }
986 }
987
PutBlock4x4(mfxU32 x0,mfxU32 y0,mfxU8 block[16],mfxFrameSurface1 * surf)988 void FrameProcessor::PutBlock4x4(mfxU32 x0, mfxU32 y0, mfxU8 block[16], mfxFrameSurface1* surf)
989 {
990 if (surf == nullptr)
991 {
992 throw std::string("ERROR: PutBlock4x4: null pointer reference");
993 }
994 //put block in the current frame, x0, y0 pixel coordinates
995 for (mfxU32 y = 0; y < 4; y++)
996 {
997 for (mfxU32 x = 0; x < 4; x++)
998 {
999 *(surf->Data.Y + (y0 + y)* surf->Data.Pitch + x0 + x) = block[4 * y + x];
1000 *(surf->Data.U + ((y0 + y) / 2) * (surf->Data.Pitch / 2) + (x0 + x) / 2) = CHROMA_DEFAULT;
1001 *(surf->Data.V + ((y0 + y) / 2) * (surf->Data.Pitch / 2) + (x0 + x) / 2) = CHROMA_DEFAULT;
1002 }
1003 }
1004 return;
1005 }
1006
1007
1008 // Trace back pixels from block BP shifted by MV coordinates on GEN frame surf_from to MOD frame surf_to
1009 //
1010 // bp - block on MOD frame
1011 // mv - it's shift on GEN frame
1012 // surf_from - GEN frame surf
1013 // surf_to - MOD frame surf
1014
TraceBackAndPutBlockIntoFrame(const PUBlock & PU,FrameChangeDescriptor & descr)1015 void FrameProcessor::TraceBackAndPutBlockIntoFrame(const PUBlock & PU, FrameChangeDescriptor & descr)
1016 {
1017 std::pair<mfxU32, mfxU32> fractOffsetL0(PU.m_MV.MV[0].x & 3, PU.m_MV.MV[0].y & 3);
1018 std::pair<mfxU32, mfxU32> fractOffsetL1(PU.m_MV.MV[1].x & 3, PU.m_MV.MV[1].y & 3);
1019
1020 auto itL0 = std::next(descr.m_refDescrList0.begin(), PU.m_MV.RefIdx.RefL0);
1021 auto itL1 = std::next(descr.m_refDescrList1.begin(), PU.m_MV.RefIdx.RefL1);
1022
1023 mfxFrameSurface1* surfDest = nullptr;
1024 mfxFrameSurface1* surfL0Ref = nullptr;
1025 mfxFrameSurface1* surfL1Ref = nullptr;
1026
1027 surfDest = descr.m_frame;
1028
1029 if (itL0 != descr.m_refDescrList0.end())
1030 {
1031 surfL0Ref = itL0->m_frame;
1032 }
1033 else
1034 {
1035 throw("ERROR: TraceBackAndPutBlockIntoFrame: L0 ref not found");
1036 }
1037
1038 if (descr.m_frameType & MFX_FRAMETYPE_B)
1039 {
1040 if (itL1 != descr.m_refDescrList1.end())
1041 {
1042 surfL1Ref = itL1->m_frame;
1043 }
1044 else
1045 {
1046 throw("ERROR: TraceBackAndPutBlockIntoFrame: L1 ref not found");
1047 }
1048 }
1049
1050 PatchBlock outPatch(PU);
1051 InterpolWorkBlock workBlockL0;
1052 InterpolWorkBlock workBlockL1;
1053
1054 if (PU.predFlagL0 && PU.predFlagL1)
1055 {
1056 workBlockL0 = GetInterpolWorkBlockPreWP(PU.GetShiftedBaseBlock(L0), fractOffsetL0, surfL0Ref);
1057 workBlockL1 = GetInterpolWorkBlockPreWP(PU.GetShiftedBaseBlock(L1), fractOffsetL1, surfL1Ref);
1058 outPatch = ApplyDefaultWeightedPrediction(workBlockL0, workBlockL1);
1059 }
1060 else if (PU.predFlagL0)
1061 {
1062 workBlockL0 = GetInterpolWorkBlockPreWP(PU.GetShiftedBaseBlock(L0), fractOffsetL0, surfL0Ref);
1063 outPatch = ApplyDefaultWeightedPrediction(workBlockL0);
1064 }
1065 else if (PU.predFlagL1)
1066 {
1067 workBlockL1 = GetInterpolWorkBlockPreWP(PU.GetShiftedBaseBlock(L1), fractOffsetL1, surfL1Ref);
1068 outPatch = ApplyDefaultWeightedPrediction(workBlockL1);
1069 }
1070 else
1071 throw("ERROR: TraceBackAndPutBlockIntoFrame: predFlagL0 and predFlagL1 are equal 0");
1072
1073 //Adjust outPatch coords so that they correspond to the unshifted PU
1074 outPatch.m_AdrX = PU.m_AdrX;
1075 outPatch.m_AdrY = PU.m_AdrY;
1076
1077 PutPatchIntoFrame(outPatch, *surfDest);
1078
1079 return;
1080 }
1081
1082
GetInterpolWorkBlockPreWP(const BaseBlock & blockFrom,std::pair<mfxU32,mfxU32> fractOffset,mfxFrameSurface1 * surfFrom)1083 InterpolWorkBlock FrameProcessor::GetInterpolWorkBlockPreWP(const BaseBlock & blockFrom, std::pair<mfxU32, mfxU32> fractOffset, mfxFrameSurface1* surfFrom)
1084 {
1085 if (surfFrom == nullptr)
1086 {
1087 throw std::string("ERROR: GetInterpolWorkBlockPreWP: null pointer reference");
1088 }
1089 InterpolWorkBlock workBlock(blockFrom);
1090
1091 //Luma
1092 for (mfxU32 i = 0; i < blockFrom.m_BHeight; i++)
1093 {
1094 for (mfxU32 j = 0; j < blockFrom.m_BWidth; j++)
1095 {
1096 mfxU32 offset = i * blockFrom.m_BWidth + j;
1097 workBlock.m_YArr[offset] = CalculateLumaPredictionSamplePreWP(
1098 std::make_pair(blockFrom.m_AdrX + j, blockFrom.m_AdrY + i), fractOffset, surfFrom);
1099 }
1100 }
1101
1102 // Chroma(YV12 / I420 only) - TODO: enable correct interpolation for chroma
1103 //NB: MFX_FOURCC_YV12 is an umbrella designation for both YV12 and I420 here, as
1104 //the process of copying pixel values in memory is the same
1105
1106 //TODO: implement proper chroma interpolation
1107 if (surfFrom->Info.FourCC == MFX_FOURCC_YV12)
1108 {
1109 for (mfxU32 i = 0; i < blockFrom.m_BHeight / 2; ++i)
1110 {
1111 for (mfxU32 j = 0; j < blockFrom.m_BWidth / 2; j++)
1112 {
1113 mfxU32 offsetChr = i * blockFrom.m_BWidth / 2 + j;
1114 workBlock.m_UArr[offsetChr] = surfFrom->Data.U[
1115 (blockFrom.m_AdrY / 2 + i) * surfFrom->Data.Pitch / 2 + blockFrom.m_AdrX / 2 + j];
1116 workBlock.m_VArr[offsetChr] = surfFrom->Data.V[
1117 (blockFrom.m_AdrY / 2 + i) * surfFrom->Data.Pitch / 2 + blockFrom.m_AdrX / 2 + j];
1118
1119 //For now, just scale uninterpolated chroma so that we can call default weighted prediction
1120 //on chroma components the same way we do with luma
1121 workBlock.m_UArr[offsetChr] <<= 6;
1122 workBlock.m_VArr[offsetChr] <<= 6;
1123 }
1124 }
1125 }
1126
1127 return workBlock;
1128 }
1129
ApplyDefaultWeightedPrediction(InterpolWorkBlock & workBlockLx)1130 PatchBlock FrameProcessor::ApplyDefaultWeightedPrediction(InterpolWorkBlock & workBlockLx)
1131 {
1132 PatchBlock outPatch(static_cast<BaseBlock>(workBlockLx));
1133
1134 //Luma
1135 for (mfxU32 i = 0; i < outPatch.m_BHeight * outPatch.m_BWidth; i++)
1136 {
1137 outPatch.m_YPlane[i] = GetDefaultWeightedPredSample(workBlockLx.m_YArr[i]);
1138 }
1139
1140 //Chroma
1141 for (mfxU32 i = 0; i < outPatch.m_BHeight / 2 * outPatch.m_BWidth / 2; i++)
1142 {
1143 outPatch.m_UPlane[i] = GetDefaultWeightedPredSample(workBlockLx.m_UArr[i]);
1144 outPatch.m_VPlane[i] = GetDefaultWeightedPredSample(workBlockLx.m_VArr[i]);
1145 }
1146 return outPatch;
1147 }
1148
ApplyDefaultWeightedPrediction(InterpolWorkBlock & workBlockL0,InterpolWorkBlock & workBlockL1)1149 PatchBlock FrameProcessor::ApplyDefaultWeightedPrediction(InterpolWorkBlock & workBlockL0, InterpolWorkBlock & workBlockL1)
1150 {
1151 if (workBlockL0.m_BHeight != workBlockL1.m_BHeight || workBlockL0.m_BWidth != workBlockL1.m_BWidth)
1152 {
1153 throw std::string("ERROR: ApplyDefaultWeightedPrediction: InterpolWorkBlocks for bi-prediction must have same size");
1154 }
1155 PatchBlock outPatch(static_cast<BaseBlock>(workBlockL0));
1156 for (mfxU32 i = 0; i < outPatch.m_BHeight * outPatch.m_BWidth; i++)
1157 {
1158 outPatch.m_YPlane[i] = GetDefaultWeightedPredSample(workBlockL0.m_YArr[i], workBlockL1.m_YArr[i]);
1159 }
1160
1161 for (mfxU32 i = 0; i < outPatch.m_BHeight / 2 * outPatch.m_BWidth / 2; i++)
1162 {
1163 outPatch.m_UPlane[i] = GetDefaultWeightedPredSample(workBlockL0.m_UArr[i], workBlockL1.m_UArr[i]);
1164 outPatch.m_VPlane[i] = GetDefaultWeightedPredSample(workBlockL0.m_VArr[i], workBlockL1.m_VArr[i]);
1165 }
1166
1167 return outPatch;
1168 }
1169
SetCorrectMVPBlockSize(mfxU8 mvpBlockSizeParam)1170 mfxU8 FrameProcessor::SetCorrectMVPBlockSize(mfxU8 mvpBlockSizeParam)
1171 {
1172 if (!mvpBlockSizeParam)
1173 {
1174 switch (m_CTUStr.CTUSize)
1175 {
1176 case 16:
1177 return 1;
1178 case 32:
1179 return 2;
1180 case 64:
1181 return 3;
1182 default:
1183 break;
1184 }
1185 }
1186 return mvpBlockSizeParam;
1187 }
1188
1189 // Returns predicted luma value (Y) for sample with provided location on reference frame given in quarter-pixel units
1190 //
1191 // refSamplePositionFull - (xInt,yInt) Luma location on the reference frame given in full-sample units. Assumed (x,y) has correct value.
1192 // refSamplePositionFract - (xFract,yFract) Luma location on the reference frame given in quarter-sample units.
1193 // refSurface - reference frame, containing luma samples
1194 // Luma interpolation process described in H265 standard (p.163 - 165)
1195
CalculateLumaPredictionSamplePreWP(const std::pair<mfxU32,mfxU32> & refSamplePositionFull,const std::pair<mfxU32,mfxU32> & refSamplePositionFract,mfxFrameSurface1 * refSurface)1196 mfxI32 FrameProcessor::CalculateLumaPredictionSamplePreWP(const std::pair<mfxU32, mfxU32>& refSamplePositionFull,
1197 const std::pair<mfxU32, mfxU32>& refSamplePositionFract, mfxFrameSurface1 * refSurface)
1198 {
1199 mfxU32 xFull = refSamplePositionFull.first;
1200 mfxU32 yFull = refSamplePositionFull.second;
1201 mfxU32 xFract = refSamplePositionFract.first;
1202 mfxU32 yFract = refSamplePositionFract.second;
1203
1204 // These shift variables used below are specified in H265 spec for 8 bit Luma depth
1205 // shift1 := 0
1206 // shift2 := 6
1207 // shift3 := 6
1208
1209 // Stores output of the sub-sample filtering process
1210 mfxI32 interpolatedSample = 0;
1211
1212 /*
1213 // Integer and quarter sample positions used for interpolation
1214
1215 A-10 O O O | A00 a00 b00 c00 | A10 O O O A20
1216 d-10 O O O | d00 e00 f00 g00 | d10 O O O d20
1217 h-10 O O O | h00 i00 j00 k00 | h10 O O O h20
1218 n-10 O O O | n00 p00 q00 r00 | n10 O O O n20
1219 */
1220
1221 switch (xFract)
1222 {
1223 case 0:
1224 switch (yFract)
1225 {
1226 case 0:
1227 // A << shift3
1228 interpolatedSample = ApplyVerticalSubSampleLumaFilter(xFull, yFull,
1229 refSurface, LUMA_SUBSAMPLE_FILTER_COEFF[0]) * 64;
1230 break;
1231 case 1:
1232 // d00 := (-A(0,-3) + 4*A(0,-2) - 10*A(0,-1) + 58*A(0,0) + 17*A(0,1) - 5*A(0,2) + A(0,3)) >> shift1
1233 interpolatedSample = ApplyVerticalSubSampleLumaFilter(xFull, yFull,
1234 refSurface, LUMA_SUBSAMPLE_FILTER_COEFF[1]);
1235 break;
1236 case 2:
1237 // h00 := (-A(0,-3) + 4*A(0,-2) - 11*A(0,-1) + 40*A(0,0) + 40*A(0,1) - 11*A(0,2) + 4*A(0,3) - A(0,4)) >> shift1
1238 interpolatedSample = ApplyVerticalSubSampleLumaFilter(xFull, yFull,
1239 refSurface, LUMA_SUBSAMPLE_FILTER_COEFF[2]);
1240 break;
1241 case 3:
1242 // n00 := (A(0,-2) - 5*A(0,-1) + 17*A(0,0) + 58*A(0,1) - 10*A(0,2) + 4*A(0,3) - A(0,4)) >> shift1
1243 interpolatedSample = ApplyVerticalSubSampleLumaFilter(xFull, yFull,
1244 refSurface, LUMA_SUBSAMPLE_FILTER_COEFF[3]);
1245 break;
1246 default:
1247 break;
1248 }
1249 break;
1250 case 1:
1251 {
1252 // a0i, where i = -3..4
1253 // a0i = [a(0,-3) a(0,-2) a(0,-1) a(0,0) a(0,1) a(0,2) a(0,3) a(0,4)]
1254 std::vector<mfxI32> fractUtilSamples;
1255 fractUtilSamples.reserve(LUMA_TAPS_NUMBER);
1256
1257 for (mfxI32 i = 0; i < LUMA_TAPS_NUMBER; i++)
1258 {
1259 // a0i := (-A(-3,i) + 4*A(-2,i) - 10*A(-1,i) + 58*A(0,i) + 17*A(1,i) - 5*A(2,i) + A(3,i) >> shift1)
1260 fractUtilSamples.push_back(ApplyHorizontalSubSampleLumaFilter(xFull,
1261 yFull + (i - 3), refSurface, LUMA_SUBSAMPLE_FILTER_COEFF[1]));
1262 }
1263
1264 switch (yFract)
1265 {
1266 case 0:
1267 // a00 := a(0,0)
1268 interpolatedSample = std::inner_product(fractUtilSamples.begin(), fractUtilSamples.end(),
1269 LUMA_SUBSAMPLE_FILTER_COEFF[0], 0);
1270 break;
1271 case 1:
1272 // e00 := (-a(0,-3) + 4*a(0,-2) - 10*a(0,-1) + 58*a(0,0) + 17*a(0,1) - 5*a(0,2) + a(0,3) >> shift2)
1273 interpolatedSample = std::inner_product(fractUtilSamples.begin(),
1274 fractUtilSamples.end(), LUMA_SUBSAMPLE_FILTER_COEFF[1], 0) / 64;
1275 break;
1276 case 2:
1277 // i00 := (-a(0,-3) + 4*a(0,-2) - 11*a(0,-1) + 40*a(0,0) + 40*a(0,1) - 11*a(0,2) + 4*a(0,3) - a(0,4) >> shift2)
1278 interpolatedSample = std::inner_product(fractUtilSamples.begin(),
1279 fractUtilSamples.end(), LUMA_SUBSAMPLE_FILTER_COEFF[2], 0) / 64;
1280 break;
1281 case 3:
1282 // p00 := (a(0,-2) - 5*a(0,-1) + 17*a(0,0) + 58*a(0,1) - 10*a(0,2) + 4*a(0,3) - a(0,4) >> shift2)
1283 interpolatedSample = std::inner_product(fractUtilSamples.begin(), fractUtilSamples.end(),
1284 LUMA_SUBSAMPLE_FILTER_COEFF[3], 0) / 64;
1285 break;
1286 default:
1287 break;
1288 }
1289 break;
1290 }
1291 case 2:
1292 {
1293 // b0i, where i = -3..4
1294 // b0i = [b(0,-3) b(0,-2) b(0,-1) b(0,0) b(0,1) b(0,2) b(0,3) b(0,4)]
1295 std::vector<mfxI32> fractUtilSamples;
1296 fractUtilSamples.reserve(LUMA_TAPS_NUMBER);
1297
1298 for (mfxI32 i = 0; i < LUMA_TAPS_NUMBER; i++)
1299 {
1300 // b0i := (-A(-3,i) + 4*A(-2,i) - 11*A(-1,i) + 40*A(0,i) + 40*A(1,i) - 11*A(2,i) + 4*A(3,i) - A(4,i) >> shift1)
1301 fractUtilSamples.push_back(ApplyHorizontalSubSampleLumaFilter(xFull, yFull + (i - 3),
1302 refSurface, LUMA_SUBSAMPLE_FILTER_COEFF[2]));
1303 }
1304
1305 switch (yFract)
1306 {
1307 case 0:
1308 // b00 := (-A(-3,0) + 4*A(-2,0) - 11*A(-1,0) + 40*A(0,0) + 40*A(1,0) - 11*A(2,0) + 4*A(3,0) - A(4,0) >> shift1)
1309 interpolatedSample = std::inner_product(fractUtilSamples.begin(), fractUtilSamples.end(),
1310 LUMA_SUBSAMPLE_FILTER_COEFF[0], 0);
1311 break;
1312 case 1:
1313 // f00 := (-b(0,-3) + 4*b(0,-2) - 10*b(0,-1) + 58*b(0,0) + 17*b(0,1) - 5*b(0,2) + b(0,3) >> shift2)
1314 interpolatedSample = std::inner_product(fractUtilSamples.begin(), fractUtilSamples.end(),
1315 LUMA_SUBSAMPLE_FILTER_COEFF[1], 0) / 64;
1316 break;
1317 case 2:
1318 // j00 := (-b(0,-3) + 4*b(0,-2) - 11*b(0,-1) + 40*b(0,0) + 40*b(0,1) - 11*b(0,2) + 4*b(0,3) - b(0,4) >> shift2)
1319 interpolatedSample = std::inner_product(fractUtilSamples.begin(), fractUtilSamples.end(),
1320 LUMA_SUBSAMPLE_FILTER_COEFF[2], 0) / 64;
1321 break;
1322 case 3:
1323 // q00 := (b(0,-2) - 5*b(0,-1) + 17*b(0,0) + 58*b(0,1) - 10*b(0,2) + 4*b(0,3) - b(0,4) >> shift2)
1324 interpolatedSample = std::inner_product(fractUtilSamples.begin(), fractUtilSamples.end(),
1325 LUMA_SUBSAMPLE_FILTER_COEFF[3], 0) / 64;
1326 break;
1327 default:
1328 break;
1329 }
1330 break;
1331 }
1332 case 3:
1333 {
1334 // c0i, where i = -3..4
1335 // c0i = [c(0,-3) c(0,-2) c(0,-1) c(0,0) c(0,1) c(0,2) c(0,3) c(0,4)]
1336 std::vector<mfxI32> fractUtilSamples;
1337 fractUtilSamples.reserve(LUMA_TAPS_NUMBER);
1338
1339 for (mfxI32 i = 0; i < LUMA_TAPS_NUMBER; i++)
1340 {
1341 // c0i := (A(-2,i) - 5*A(-1,i) + 17*A(0,i) + 58*A(1,i) - 10*A(2,i) + 4*A(3,i) - A(4,i) >> shift1)
1342 fractUtilSamples.push_back(ApplyHorizontalSubSampleLumaFilter(xFull, yFull + (i - 3),
1343 refSurface, LUMA_SUBSAMPLE_FILTER_COEFF[3]));
1344 }
1345
1346 switch (yFract)
1347 {
1348 case 0:
1349 // c00 := c(0,0)
1350 interpolatedSample = std::inner_product(fractUtilSamples.begin(), fractUtilSamples.end(),
1351 LUMA_SUBSAMPLE_FILTER_COEFF[0], 0);
1352 break;
1353 case 1:
1354 // g00 := (-c(0,-3) + 4*c(0,-2) - 10*c(0,-1) + 58*c(0,0) + 17*c(0,1) - 5*c(0,2) + c(0,3) >> shift2)
1355 interpolatedSample = std::inner_product(fractUtilSamples.begin(), fractUtilSamples.end(),
1356 LUMA_SUBSAMPLE_FILTER_COEFF[1], 0) / 64;
1357 break;
1358 case 2:
1359 // k00 := (-c(0,-3) + 4*c(0,-2) - 11*c(0,-1) + 40*c(0,0) + 40*c(0,1) - 11*c(0,2) + 4*c(0,3) - c(0,4) >> shift2)
1360 interpolatedSample = std::inner_product(fractUtilSamples.begin(), fractUtilSamples.end(),
1361 LUMA_SUBSAMPLE_FILTER_COEFF[2], 0) / 64;
1362 break;
1363 case 3:
1364 // r00 := (c(0,-2) - 5*c(0,-1) + 17*c(0,0) + 58*c(0,1) - 10*c(0,2) + 4*c(0,3) - c(0,4) >> shift2)
1365 interpolatedSample = std::inner_product(fractUtilSamples.begin(), fractUtilSamples.end(),
1366 LUMA_SUBSAMPLE_FILTER_COEFF[3], 0) / 64;
1367 break;
1368 default:
1369 break;
1370 }
1371 break;
1372 }
1373 default:
1374 break;
1375 }
1376
1377 return interpolatedSample;
1378 }
1379
ApplyVerticalSubSampleLumaFilter(mfxU32 x,mfxU32 y,mfxFrameSurface1 * refSurface,const mfxI32 * coeff)1380 mfxI32 FrameProcessor::ApplyVerticalSubSampleLumaFilter(mfxU32 x, mfxU32 y, mfxFrameSurface1 * refSurface, const mfxI32 * coeff)
1381 {
1382 mfxI32 sum = 0;
1383
1384 for (mfxI32 i = -3; i <= 4; i++)
1385 {
1386 sum += coeff[i + 3] * GetClippedSample(COLOR_COMPONENT::LUMA_Y, (mfxI32)x, (mfxI32)y + i, refSurface);
1387 }
1388
1389 return sum;
1390 }
1391
ApplyHorizontalSubSampleLumaFilter(mfxU32 x,mfxU32 y,mfxFrameSurface1 * refSurface,const mfxI32 * coeff)1392 mfxI32 FrameProcessor::ApplyHorizontalSubSampleLumaFilter(mfxU32 x, mfxU32 y, mfxFrameSurface1 * refSurface, const mfxI32 * coeff)
1393 {
1394 mfxI32 sum = 0;
1395
1396 for (mfxI32 i = -3; i <= 4; i++)
1397 {
1398 sum += coeff[i + 3] * GetClippedSample(COLOR_COMPONENT::LUMA_Y, (mfxI32)x + i, (mfxI32)y, refSurface);
1399 }
1400
1401 return sum;
1402 }
1403
1404 // Release processed surfaces
1405
UnlockSurfaces(FrameChangeDescriptor & frame_descr)1406 void FrameProcessor::UnlockSurfaces(FrameChangeDescriptor & frame_descr)
1407 {
1408 frame_descr.m_frame->Data.Locked = 0;
1409
1410 // Unlock all reference GEN frames recursively
1411 if (frame_descr.m_refDescrList0.empty() && frame_descr.m_refDescrList1.empty()) return;
1412 else
1413 {
1414 for (auto & ref_descr : frame_descr.m_refDescrList0)
1415 {
1416 UnlockSurfaces(ref_descr);
1417 }
1418 for (auto & ref_descr : frame_descr.m_refDescrList1)
1419 {
1420 UnlockSurfaces(ref_descr);
1421 }
1422 }
1423
1424 return;
1425 }
1426
1427 //this function has the same behavior for any color component
1428 //it is more convenient to have input in the coordinates of colorComp component space
FillIntraRefSamples(mfxU32 cSize,mfxU32 cAdrX,mfxU32 cAdrY,const PatchBlock & frame,COLOR_COMPONENT colorComp,std::vector<mfxU8> & refSamples)1429 void FrameProcessor::FillIntraRefSamples(mfxU32 cSize, mfxU32 cAdrX, mfxU32 cAdrY, const PatchBlock& frame, COLOR_COMPONENT colorComp, std::vector<mfxU8>& refSamples)
1430 {
1431 refSamples.clear();
1432 const mfxU32 NO_SAMPLES_AVAILABLE = 0xffffffff;
1433 mfxU8 prevSampleAvail = 128; //default ref sample value is 128 if no real ref samples are available
1434 mfxU32 firstSampleAvailPos = NO_SAMPLES_AVAILABLE; //position of the first available ref sample in refSamples
1435
1436 //fill vertical part
1437 mfxU32 currCAdrX = cAdrX - 1;
1438 mfxU32 currCAdrY = cAdrY + 2 * cSize - 1;
1439
1440 for (mfxU32 i = 0; i < 2 * cSize + 1; i++, currCAdrY--)
1441 {
1442 if (IsSampleAvailable(currCAdrX, currCAdrY))
1443 {
1444 prevSampleAvail = frame.GetSampleI420(colorComp, currCAdrX, currCAdrY);
1445 refSamples.push_back(prevSampleAvail);
1446 if (firstSampleAvailPos == NO_SAMPLES_AVAILABLE)
1447 {
1448 firstSampleAvailPos = i;
1449 }
1450 }
1451 else
1452 {
1453 refSamples.push_back(prevSampleAvail);
1454 }
1455 }
1456 currCAdrX = cAdrX;
1457 currCAdrY = cAdrY - 1;
1458
1459 //fill horizontal part
1460 for (mfxU32 i = 2 * cSize + 1; i < 4 * cSize + 1; i++, currCAdrX++)
1461 {
1462 if (IsSampleAvailable(currCAdrX, currCAdrY))
1463 {
1464 prevSampleAvail = frame.GetSampleI420(colorComp, currCAdrX, currCAdrY);
1465 refSamples.push_back(prevSampleAvail);
1466 if (firstSampleAvailPos == NO_SAMPLES_AVAILABLE)
1467 {
1468 firstSampleAvailPos = i;
1469 }
1470 }
1471 else
1472 {
1473 refSamples.push_back(prevSampleAvail);
1474 }
1475 }
1476 //fill initial part with with first available ref sample value
1477 if (firstSampleAvailPos != NO_SAMPLES_AVAILABLE)
1478 {
1479 std::fill(refSamples.begin(), refSamples.begin() + firstSampleAvailPos, refSamples[firstSampleAvailPos]);
1480 }
1481 }
1482
ChooseFilter(std::vector<mfxU8> & RefSamples,mfxU8 size,INTRA_MODE mode)1483 FILTER_TYPE FrameProcessor::ChooseFilter(std::vector<mfxU8>& RefSamples, mfxU8 size, INTRA_MODE mode) {
1484 FILTER_TYPE filter = NO_FILTER;
1485 if (mode == DC || size == 4)
1486 return filter;
1487 switch (size) {
1488 case 8:
1489 if (mode == 2 || mode == 18 || mode == 34)
1490 filter = THREE_TAP_FILTER;
1491 break;
1492 case 16:
1493 filter = THREE_TAP_FILTER;
1494 if ((mode > 8 && mode < 12) || (mode > 24 && mode < 28))
1495 filter = NO_FILTER;
1496 break;
1497 case 32:
1498 filter = THREE_TAP_FILTER;
1499 if (mode == ANG10_HOR || mode == ANG26_VER)
1500 filter = NO_FILTER;
1501 else if (std::abs(RefSamples[0] + RefSamples[2 * size] - 2 * RefSamples[size]) < 8 &&
1502 std::abs(RefSamples[2 * size] + RefSamples[4 * size] - 2 * RefSamples[3 * size]) < 8)
1503 filter = STRONG_INTRA_SMOOTHING_FILTER;
1504 break;
1505 default:
1506 break;
1507 }
1508 return filter;
1509 }
1510
ThreeTapFilter(std::vector<mfxU8> & RefSamples,mfxU8 size)1511 void FrameProcessor::ThreeTapFilter(std::vector<mfxU8>& RefSamples, mfxU8 size) {
1512 for (mfxU8 i = 1; i < (size << 2); i++)
1513 RefSamples[i] = (RefSamples[i - 1] + 2 * RefSamples[i] + RefSamples[i + 1] + 2) >> 2;
1514 }
1515
StrongFilter(std::vector<mfxU8> & RefSamples,mfxU8 size)1516 void FrameProcessor::StrongFilter(std::vector<mfxU8>& RefSamples, mfxU8 size) {
1517 for (mfxU8 i = 1; i < 2 * size; i++)
1518 RefSamples[i] = (i * RefSamples[2 * size] + (2 * size - i) * RefSamples[0] + 32) >> 6;
1519 for (mfxU8 i = 1; i < 2 * size; i++)
1520 RefSamples[2 * size + i] = ((2 * size - i) * RefSamples[2 * size] + i * RefSamples[4 * size] + 32) >> 6;
1521 }
1522
MakeFilter(std::vector<mfxU8> & RefSamples,mfxU8 size,INTRA_MODE mode)1523 FILTER_TYPE FrameProcessor::MakeFilter(std::vector<mfxU8>& RefSamples, mfxU8 size, INTRA_MODE mode) {
1524 FILTER_TYPE filter = ChooseFilter(RefSamples, size, mode);
1525 switch (filter) {
1526 case NO_FILTER:
1527 break;
1528 case THREE_TAP_FILTER:
1529 ThreeTapFilter(RefSamples, size);
1530 break;
1531 case STRONG_INTRA_SMOOTHING_FILTER:
1532 StrongFilter(RefSamples, size);
1533 break;
1534 default:
1535 break;
1536 }
1537 return filter;
1538 }
1539
MakeProjRefArray(const std::vector<mfxU8> & RefSamples,mfxU8 size,const IntraParams & IntraMode,std::vector<mfxU8> & ProjRefSamples)1540 mfxU8 FrameProcessor::MakeProjRefArray(const std::vector<mfxU8>& RefSamples, mfxU8 size, const IntraParams& IntraMode, std::vector<mfxU8>& ProjRefSamples)
1541 {
1542 mfxU8 NumProj = 0;
1543
1544 if (IntraMode.direction == HORIZONTAL)
1545 {
1546 ProjRefSamples.insert(ProjRefSamples.end(), RefSamples.begin(), RefSamples.begin() + 2 * size + 1);
1547 if (IntraMode.intraPredAngle < 0)
1548 {
1549 if (IntraMode.invAngle == 0)
1550 {
1551 throw std::string("ERROR: MakeProjRefArray: invAngle == 0 for angular mode with intraPredAngle < 0");
1552 }
1553 mfxI8 y = -1;
1554 mfxI32 sampleForProjectionPos = 2 * size + ((y * IntraMode.invAngle + 128) >> 8);
1555 while (sampleForProjectionPos < 4 * size + 1)
1556 {
1557 ProjRefSamples.push_back(RefSamples[sampleForProjectionPos]);
1558 sampleForProjectionPos = 2 * size + ((--y * IntraMode.invAngle + 128) >> 8);
1559 }
1560 }
1561 std::reverse(ProjRefSamples.begin(), ProjRefSamples.end());
1562 NumProj = (mfxU8)(ProjRefSamples.size() - 2 * size - 1);
1563 }
1564 else if (IntraMode.direction == VERTICAL)
1565 {
1566 if (IntraMode.intraPredAngle < 0)
1567 {
1568 if (IntraMode.invAngle == 0)
1569 {
1570 throw std::string("ERROR: MakeProjRefArray: invAngle == 0 for angular mode with intraPredAngle < 0");
1571 }
1572 mfxI8 x = -1;
1573 mfxI32 sampleForProjectionPos = 2 * size - ((x * IntraMode.invAngle + 128) >> 8);
1574
1575 while (sampleForProjectionPos > -1)
1576 {
1577 ProjRefSamples.push_back(RefSamples[sampleForProjectionPos]);
1578 sampleForProjectionPos = 2 * size - ((--x * IntraMode.invAngle + 128) >> 8);
1579 }
1580
1581 std::reverse(ProjRefSamples.begin(), ProjRefSamples.end());
1582 }
1583
1584 NumProj = (mfxU8)ProjRefSamples.size();
1585 ProjRefSamples.insert(ProjRefSamples.end(), RefSamples.begin() + 2 * size, RefSamples.end());
1586 }
1587 return NumProj;
1588 }
1589
PlanarPrediction(const std::vector<mfxU8> & RefSamples,mfxU8 size,mfxU8 * patch)1590 void FrameProcessor::PlanarPrediction(const std::vector<mfxU8>& RefSamples, mfxU8 size, mfxU8 * patch)
1591 {
1592 if (patch == nullptr)
1593 {
1594 throw std::string("ERROR: PlanarPrediction: pointer to buffer is null\n");
1595 }
1596
1597 for (mfxI32 y = 0; y < size; y++)
1598 {
1599 for (mfxI32 x = 0; x < size; x++)
1600 {
1601 patch[y * size + x] = (
1602 (size - 1 - x) * RefSamples[2 * size - 1 - y]
1603 + (x + 1) * RefSamples[3 * size + 1]
1604 + (size - 1 - y) * RefSamples[2 * size + 1 + x]
1605 + (y + 1) * RefSamples[size - 1]
1606 + size) / (size * 2);
1607 }
1608 }
1609
1610 }
1611
DCPrediction(const std::vector<mfxU8> & RefSamples,mfxU8 size,mfxU8 * patch)1612 void FrameProcessor::DCPrediction(const std::vector<mfxU8>& RefSamples, mfxU8 size, mfxU8 * patch)
1613 {
1614 if (patch == nullptr)
1615 {
1616 throw std::string("ERROR: DCPrediction: pointer to buffer is null\n");
1617 }
1618
1619 mfxU32 DCValue = size;
1620 for (mfxI32 i = 0; i < size; i++)
1621 {
1622 DCValue += RefSamples[2 * size - 1 - i] + RefSamples[i + 2 * size + 1];
1623 }
1624 DCValue /= 2 * size;
1625 memset(patch, DCValue, size*size);
1626 }
1627
AngularPrediction(const std::vector<mfxU8> & RefSamples,mfxU8 size,IntraParams & params,mfxU8 * patch)1628 void FrameProcessor::AngularPrediction(const std::vector<mfxU8>& RefSamples, mfxU8 size, IntraParams& params, mfxU8 * patch) {
1629 if (patch == nullptr)
1630 {
1631 throw std::string("ERROR: AngularPrediction: pointer to buffer is null\n");
1632 }
1633
1634 std::vector<mfxU8> ProjRefSamples;
1635 mfxU8 NumProj = MakeProjRefArray(RefSamples, size, params, ProjRefSamples);
1636 if (params.direction == HORIZONTAL)
1637 for (mfxI32 y = 0; y < size; y++)
1638 for (mfxI32 x = 0; x < size; x++) {
1639 mfxI32 f = ((x + 1) * params.intraPredAngle) & 31;
1640 mfxI32 i = ((x + 1) * params.intraPredAngle) >> 5;
1641 if (f != 0)
1642 patch[y * size + x] = ((32 - f) * ProjRefSamples[y + i + 1 + NumProj] + f * ProjRefSamples[y + i + 2 + NumProj] + 16) >> 5;
1643 else
1644 patch[y * size + x] = ProjRefSamples[y + i + 1 + NumProj];
1645 }
1646 else
1647 for (mfxI32 y = 0; y < size; y++)
1648 for (mfxI32 x = 0; x < size; x++) {
1649 mfxI32 f = ((y + 1) * params.intraPredAngle) & 31;
1650 mfxI32 i = ((y + 1) * params.intraPredAngle) >> 5;
1651 if (f != 0)
1652 patch[y * size + x] = ((32 - f) * ProjRefSamples[x + i + 1 + NumProj] + f * ProjRefSamples[x + i + 2 + NumProj] + 16) >> 5;
1653 else
1654 patch[y * size + x] = ProjRefSamples[x + i + 1 + NumProj];
1655 }
1656
1657 return;
1658 }
1659
GenerateIntraPrediction(const std::vector<mfxU8> & RefSamples,mfxU8 blockSize,INTRA_MODE currMode,mfxU8 * currPlane)1660 void FrameProcessor::GenerateIntraPrediction(const std::vector<mfxU8>& RefSamples, mfxU8 blockSize, INTRA_MODE currMode, mfxU8* currPlane)
1661 {
1662 if (currPlane == nullptr)
1663 {
1664 throw std::string("ERROR: GenerateIntraPrediction: pointer to buffer is null\n");
1665 }
1666
1667 IntraParams params(currMode);
1668
1669 switch (params.intraMode)
1670 {
1671 case PLANAR:
1672 PlanarPrediction(RefSamples, blockSize, currPlane);
1673 break;
1674 case DC:
1675 DCPrediction(RefSamples, blockSize, currPlane);
1676 break;
1677 default:
1678 AngularPrediction(RefSamples, blockSize, params, currPlane);
1679 break;
1680 }
1681 return;
1682 }
1683
MakePostFilter(const std::vector<mfxU8> & RefSamples,mfxU8 size,INTRA_MODE currMode,mfxU8 * lumaPlane)1684 void FrameProcessor::MakePostFilter(const std::vector<mfxU8>& RefSamples, mfxU8 size, INTRA_MODE currMode, mfxU8* lumaPlane)
1685 {
1686 mfxU32 DCValue = lumaPlane[0];
1687
1688 switch (currMode)
1689 {
1690 case DC:
1691 lumaPlane[0] = (RefSamples[2 * size - 1] + 2 * DCValue + RefSamples[2 * size + 1] + 2) >> 2;
1692 for (mfxI32 x = 1; x < size; x++)
1693 {
1694 lumaPlane[x] = (RefSamples[2 * size + 1 + x] + 3 * DCValue + 2) >> 2;
1695 }
1696 for (mfxI32 y = 1; y < size; y++)
1697 {
1698 lumaPlane[y * size] = (RefSamples[2 * size - 1 - y] + 3 * DCValue + 2) >> 2;
1699 }
1700 break;
1701 case ANG10_HOR:
1702 for (mfxI32 x = 0; x < size; x++)
1703 {
1704 lumaPlane[x] = ClipIntToChar(lumaPlane[x] + ((RefSamples[2 * size + 1 + x] - RefSamples[2 * size]) >> 1));
1705 }
1706 break;
1707 case ANG26_VER:
1708 for (mfxI32 y = 0; y < size; y++)
1709 {
1710 lumaPlane[y * size] = ClipIntToChar(lumaPlane[y * size] + ((RefSamples[2 * size - 1 - y] - RefSamples[2 * size]) >> 1));
1711 }
1712 break;
1713 default:
1714 return;
1715 }
1716 }
1717
PutPatchIntoFrame(const PatchBlock & Patch,mfxFrameSurface1 & surf)1718 void FrameProcessor::PutPatchIntoFrame(const PatchBlock & Patch, mfxFrameSurface1& surf) {
1719 //luma
1720 for (mfxU32 i = 0; i < Patch.m_BHeight; i++)
1721 memcpy(surf.Data.Y + (Patch.m_AdrY + i) * surf.Data.Pitch + Patch.m_AdrX, Patch.m_YPlane + i * Patch.m_BWidth, Patch.m_BWidth);
1722 //chroma U
1723 for (mfxU32 i = 0; i < Patch.m_BHeight / 2; ++i)
1724 memcpy(surf.Data.U + (Patch.m_AdrY / 2 + i) * surf.Data.Pitch / 2 + Patch.m_AdrX / 2, Patch.m_UPlane + i * (Patch.m_BWidth / 2), Patch.m_BWidth / 2);
1725 //chroma V
1726 for (mfxU32 i = 0; i < Patch.m_BHeight / 2; ++i)
1727 memcpy(surf.Data.V + (Patch.m_AdrY / 2 + i) * surf.Data.Pitch / 2 + Patch.m_AdrX / 2, Patch.m_VPlane + i * (Patch.m_BWidth / 2), Patch.m_BWidth / 2);
1728 }
1729
GetIntraPredPlane(const BaseBlock & refBlock,INTRA_MODE currMode,const PatchBlock & frame,COLOR_COMPONENT colorComp,mfxU8 * currPlane)1730 void FrameProcessor::GetIntraPredPlane(const BaseBlock& refBlock, INTRA_MODE currMode, const PatchBlock& frame, COLOR_COMPONENT colorComp, mfxU8* currPlane)
1731 {
1732 if (currPlane == nullptr)
1733 {
1734 throw std::string("ERROR: GetIntraPredPlane: pointer to buffer is null\n");
1735 }
1736 //here refBlock parameters are measured in samples of corresponding colorComp
1737 //get reference samples for current TU
1738 std::vector<mfxU8> RefSamples;
1739 //size and coords of block in current color component
1740 mfxU32 cSize = (colorComp == LUMA_Y) ? refBlock.m_BHeight : (refBlock.m_BHeight / 2);
1741 mfxU32 cAdrX = (colorComp == LUMA_Y) ? refBlock.m_AdrX : (refBlock.m_AdrX / 2);
1742 mfxU32 cAdrY = (colorComp == LUMA_Y) ? refBlock.m_AdrY : (refBlock.m_AdrY / 2);
1743
1744 FillIntraRefSamples(cSize, cAdrX, cAdrY, frame, colorComp, RefSamples);
1745
1746 // get filter, write it into buffer and make it
1747 MakeFilter(RefSamples, cSize, currMode);
1748
1749 //generate Prediction and return the output patch
1750 GenerateIntraPrediction(RefSamples, cSize, currMode, currPlane);
1751
1752 if (colorComp == LUMA_Y && cSize < 32)
1753 {
1754 MakePostFilter(RefSamples, cSize, currMode, currPlane);
1755 }
1756 }
1757
1758
GetIntraPatchBlock(const TUBlock & refBlock,const PatchBlock & frame)1759 PatchBlock FrameProcessor::GetIntraPatchBlock(const TUBlock& refBlock, const PatchBlock& frame)
1760 {
1761 PatchBlock patch(refBlock);
1762 //get intra prediction for Luma plane
1763 GetIntraPredPlane(refBlock, refBlock.m_IntraModeLuma, frame, LUMA_Y, patch.m_YPlane);
1764 //if luma TB size > 4, fill chroma TBs of size / 2
1765 if (refBlock.m_BHeight != 4)
1766 {
1767 GetIntraPredPlane(refBlock, refBlock.m_IntraModeChroma, frame, CHROMA_U, patch.m_UPlane);
1768 GetIntraPredPlane(refBlock, refBlock.m_IntraModeChroma, frame, CHROMA_V, patch.m_VPlane);
1769 return patch;
1770 }
1771
1772 // else luma TB has size = 4, we have one chroma TB corresponding to four 4x4 luma TBs
1773 //if refBlock is the lower-right block among four brothers in the RQT,
1774 //fill extendedPatch of size 8x8 in luma samples with:
1775 //see last paragraph before new chapter, p. 65(76) in HEVC Algorithms and Architectures
1776 if (refBlock.m_AdrX % 8 == 4 && refBlock.m_AdrY % 8 == 4)
1777 {
1778 //three luma blocks 4x4 already put into targetBlock
1779 PatchBlock extendedPatch = PatchBlock(BaseBlock(refBlock.m_AdrX - 4, refBlock.m_AdrY - 4, 8, 8), frame);
1780 //luma component of size 4x4 taken from patch
1781 extendedPatch.InsertAnotherPatch(patch);
1782 //chroma components of size 4x4 corresponding to the union of four luma blocks mentioned above
1783 GetIntraPredPlane(extendedPatch, refBlock.m_IntraModeChroma, frame, CHROMA_U, extendedPatch.m_UPlane);
1784 GetIntraPredPlane(extendedPatch, refBlock.m_IntraModeChroma, frame, CHROMA_V, extendedPatch.m_VPlane);
1785 return extendedPatch;
1786 }
1787
1788 //else return only luma prediction
1789 return patch;
1790 }
1791
MakeTUIntraPrediction(const TUBlock & refBlock,PatchBlock & targetPatch)1792 void FrameProcessor::MakeTUIntraPrediction(const TUBlock& refBlock, PatchBlock& targetPatch)
1793 {
1794 PatchBlock patch(refBlock);
1795 //now the most contrast mode is determined only for luma component, chroma mode is set equal to luma mode
1796 GetIntraPredPlane(refBlock, refBlock.m_IntraModeLuma, targetPatch, LUMA_Y, patch.m_YPlane);
1797 //write Patch into targetPatch
1798 targetPatch.InsertAnotherPatch(patch);
1799 }
1800
ApplyTUIntraPrediction(const TUBlock & block,ExtendedSurface & surf)1801 void FrameProcessor::ApplyTUIntraPrediction(const TUBlock & block, ExtendedSurface& surf)
1802 {
1803 PatchBlock framePatchBlock(BaseBlock(0, 0, surf.Info.CropW, surf.Info.CropH), surf);
1804 PatchBlock patch = GetIntraPatchBlock(block, framePatchBlock);
1805 //write Patch into frame
1806 PutPatchIntoFrame(patch, surf);
1807 }
1808
1809 //Iterates over CUs in CTU and applies intra prediction for intra CUs inside it
ApplyIntraPredInCTU(const CTUDescriptor & CTU,FrameChangeDescriptor & frame_descr)1810 void FrameProcessor::ApplyIntraPredInCTU(const CTUDescriptor & CTU, FrameChangeDescriptor & frame_descr)
1811 {
1812 for (auto& CU : CTU.m_CUVec)
1813 {
1814 if (CU.m_PredType == INTRA_PRED)
1815 {
1816 for (auto& TU : CU.m_TUVec)
1817 {
1818 ApplyTUIntraPrediction(TU, *frame_descr.m_frame);
1819 }
1820 }
1821 }
1822 }
1823
1824 #endif // MFX_VERSION
1825