1 /* -----------------------------------------------------------------------------
2 The copyright in this software is being made available under the BSD
3 License, included below. No patent rights, trademark rights and/or
4 other Intellectual Property Rights other than the copyrights concerning
5 the Software are granted under this license.
6
7 For any license concerning other Intellectual Property rights than the software,
8 especially patent licenses, a separate Agreement needs to be closed.
9 For more information please contact:
10
11 Fraunhofer Heinrich Hertz Institute
12 Einsteinufer 37
13 10587 Berlin, Germany
14 www.hhi.fraunhofer.de/vvc
15 vvc@hhi.fraunhofer.de
16
17 Copyright (c) 2018-2021, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V.
18 All rights reserved.
19
20 Redistribution and use in source and binary forms, with or without
21 modification, are permitted provided that the following conditions are met:
22
23 * Redistributions of source code must retain the above copyright notice,
24 this list of conditions and the following disclaimer.
25 * Redistributions in binary form must reproduce the above copyright notice,
26 this list of conditions and the following disclaimer in the documentation
27 and/or other materials provided with the distribution.
28 * Neither the name of Fraunhofer nor the names of its contributors may
29 be used to endorse or promote products derived from this software without
30 specific prior written permission.
31
32 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
33 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
34 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
35 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
36 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
37 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
38 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
39 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
40 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
41 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
42 THE POSSIBILITY OF SUCH DAMAGE.
43
44
45 ------------------------------------------------------------------------------------------- */
46
47 /** \file DecLibRecon.cpp
48 \brief decoder class
49 */
50
51 #include "DecLib.h"
52
53 #include "CommonLib/TrQuant.h"
54 #if ENABLE_SIMD_TCOEFF_OPS
55 #include "CommonLib/TrQuant_EMT.h"
56 #endif
57 #include "CommonLib/InterPrediction.h"
58 #include "CommonLib/IntraPrediction.h"
59 #include "CommonLib/Unit.h"
60 #include "CommonLib/Buffer.h"
61 #include "CommonLib/UnitTools.h"
62
63 #include "CommonLib/dtrace_next.h"
64 #include "CommonLib/dtrace_buffer.h"
65
66 namespace vvdec
67 {
68
69 #ifdef TRACE_ENABLE_ITT
70 extern __itt_domain* itt_domain_dec;
71 extern std::vector<__itt_domain*> itt_domain_decInst;
72
73 extern __itt_string_handle* itt_handle_alf;
74 extern __itt_string_handle* itt_handle_presao;
75 extern __itt_string_handle* itt_handle_sao;
76 extern __itt_string_handle* itt_handle_lfl;
77 extern __itt_string_handle* itt_handle_intra;
78 extern __itt_string_handle* itt_handle_inter;
79 extern __itt_string_handle* itt_handle_mider;
80 extern __itt_string_handle* itt_handle_lfcl;
81 extern __itt_string_handle* itt_handle_ext;
82 extern __itt_string_handle* itt_handle_dmvr;
83 extern __itt_string_handle* itt_handle_rsp;
84
85 extern __itt_string_handle* itt_handle_schedTasks;
86 extern __itt_string_handle* itt_handle_waitTasks;
87
88 // create global domain for DecLib
89 extern __itt_domain* itt_domain_glb;
90 // create a global counter
91 extern __itt_counter itt_frame_counter;
92
93 #define ITT_TASKSTART( d, t ) __itt_task_begin( ( d ), __itt_null, __itt_null, ( t ) )
94 #define ITT_TASKEND( d, t ) __itt_task_end ( ( d ) )
95 #else
96 #define ITT_TASKSTART( d, t )
97 #define ITT_TASKEND( d, t )
98 #endif
99
100 //! \ingroup DecoderLib
101 //! \{
102
reset(CodingStructure & cs,TaskType ctuStartState,int tasksPerLine,bool doALF)103 void CommonTaskParam::reset( CodingStructure& cs, TaskType ctuStartState, int tasksPerLine, bool doALF )
104 {
105 this->cs = &cs;
106
107 const int heightInCtus = cs.pcv->heightInCtus;
108 CHECKD( !ctuStates.empty() && std::any_of( ctuStates.begin(), ctuStates.end(), []( CtuState& s ) { return s != DONE; } ), "some CTUs of previous pic not done" );
109 ctuStates = std::vector<CtuState>( heightInCtus * tasksPerLine );
110 for( auto& ctu: ctuStates )
111 {
112 ctu.store( ctuStartState );
113 }
114
115
116 this->perLineMiHist = std::vector<MotionHist>( heightInCtus );
117 this->dmvrTriggers = std::vector<Barrier> ( heightInCtus );
118
119 this->doALF = doALF;
120 this->alfPrepared.lock();
121 }
122
DecLibRecon()123 DecLibRecon::DecLibRecon()
124 {
125 #if ENABLE_SIMD_OPT_BUFFER && defined( TARGET_SIMD_X86 )
126 g_pelBufOP.initPelBufOpsX86();
127 #endif
128 #if ENABLE_SIMD_TCOEFF_OPS && defined( TARGET_SIMD_X86 )
129 g_tCoeffOps.initTCoeffOpsX86();
130 #endif
131
132 }
133
create(ThreadPool * threadPool,unsigned instanceId)134 void DecLibRecon::create( ThreadPool* threadPool, unsigned instanceId )
135 {
136 // run constructor again to ensure all variables, especially in DecLibParser have been reset
137 this->~DecLibRecon();
138 new( this ) DecLibRecon;
139
140
141 #if TRACE_ENABLE_ITT
142 if( itt_domain_decInst.size() < instanceId + 1 )
143 {
144 std::string name( "DecLibRecon " + std::to_string( instanceId ) );
145 itt_domain_decInst.push_back( __itt_domain_create( name.c_str() ) );
146 itt_domain_decInst.back()->flags = 1;
147
148 CHECK( itt_domain_decInst.back() != itt_domain_decInst[instanceId], "current decLibRecon ITT-Domain is not the last in vector. Instances created in the wrong order?" );
149 }
150 m_itt_decInst = itt_domain_decInst[instanceId];
151 #endif
152
153 m_decodeThreadPool = threadPool;
154 m_numDecThreads = std::max( 1, threadPool ? threadPool->numThreads() : 1 );
155
156 m_cIntraPred = new IntraPrediction[m_numDecThreads];
157 m_cInterPred = new InterPrediction[m_numDecThreads];
158 m_cTrQuant = new TrQuant [m_numDecThreads];
159 m_cCuDecoder = new DecCu [m_numDecThreads];
160 m_cReshaper = new Reshape [m_numDecThreads];
161 }
162
destroy()163 void DecLibRecon::destroy()
164 {
165 m_decodeThreadPool = nullptr;
166
167 delete[] m_cIntraPred; m_cIntraPred = nullptr;
168 delete[] m_cInterPred; m_cInterPred = nullptr;
169 delete[] m_cTrQuant; m_cTrQuant = nullptr;
170 delete[] m_cCuDecoder; m_cCuDecoder = nullptr;
171 delete[] m_cReshaper; m_cReshaper = nullptr;
172 }
173
borderExtPic(Picture * pic)174 void DecLibRecon::borderExtPic( Picture* pic )
175 {
176 pic->borderExtStarted = true;
177
178 const bool wrapAround = pic->cs->sps->getUseWrapAround();
179 if( wrapAround )
180 {
181 // copy reconstruction buffer to wrapAround buffer. All other border-extension tasks depend on this task.
182 static auto copyTask = []( int, Picture* picture ) {
183 ITT_TASKSTART( itt_domain_dec, itt_handle_ext );
184 picture->getRecoBuf( true ).copyFrom( picture->getRecoBuf() );
185 ITT_TASKEND( itt_domain_dec, itt_handle_ext );
186 return true;
187 };
188 pic->m_copyWrapBufDone.lock();
189 m_decodeThreadPool->addBarrierTask<Picture>( copyTask,
190 pic,
191 nullptr,
192 &pic->m_copyWrapBufDone,
193 { &pic->done} );
194 }
195
196 // start actual border extension tasks
197 {
198 static auto task = []( int, Picture* picture ) {
199 ITT_TASKSTART( itt_domain_dec, itt_handle_ext );
200 picture->extendPicBorder( true, false, false, false );
201 ITT_TASKEND( itt_domain_dec, itt_handle_ext );
202 return true;
203 };
204 m_decodeThreadPool->addBarrierTask<Picture>( task,
205 pic,
206 &pic->m_borderExtTaskCounter,
207 nullptr,
208 { wrapAround ? &pic->m_copyWrapBufDone : &pic->done} );
209 }
210
211 {
212 static auto task = []( int, Picture* picture ) {
213 ITT_TASKSTART( itt_domain_dec, itt_handle_ext );
214 picture->extendPicBorder( false, true, false, false );
215 ITT_TASKEND( itt_domain_dec, itt_handle_ext );
216 return true;
217 };
218 m_decodeThreadPool->addBarrierTask<Picture>( task,
219 pic,
220 &pic->m_borderExtTaskCounter,
221 nullptr,
222 { wrapAround ? &pic->m_copyWrapBufDone : &pic->done} );
223 }
224
225 {
226 static auto task = []( int, Picture* picture ) {
227 ITT_TASKSTART( itt_domain_dec, itt_handle_ext );
228 picture->extendPicBorder( false, false, true, false, CH_L );
229 ITT_TASKEND( itt_domain_dec, itt_handle_ext );
230 return true;
231 };
232 m_decodeThreadPool->addBarrierTask<Picture>( task,
233 pic,
234 &pic->m_borderExtTaskCounter,
235 nullptr,
236 { wrapAround ? &pic->m_copyWrapBufDone : &pic->done} );
237 }
238 {
239 static auto task = []( int, Picture* picture ) {
240 ITT_TASKSTART( itt_domain_dec, itt_handle_ext );
241 picture->extendPicBorder( false, false, false, true, CH_L );
242 ITT_TASKEND( itt_domain_dec, itt_handle_ext );
243 return true;
244 };
245 m_decodeThreadPool->addBarrierTask<Picture>( task,
246 pic,
247 &pic->m_borderExtTaskCounter,
248 nullptr,
249 { wrapAround ? &pic->m_copyWrapBufDone : &pic->done} );
250 }
251
252 {
253 static auto task = []( int, Picture* picture ) {
254 ITT_TASKSTART( itt_domain_dec, itt_handle_ext );
255 picture->extendPicBorder( false, false, true, false, CH_C );
256 ITT_TASKEND( itt_domain_dec, itt_handle_ext );
257 return true;
258 };
259 m_decodeThreadPool->addBarrierTask<Picture>( task,
260 pic,
261 &pic->m_borderExtTaskCounter,
262 nullptr,
263 { wrapAround ? &pic->m_copyWrapBufDone : &pic->done} );
264 }
265 {
266 static auto task = []( int, Picture* picture ) {
267 ITT_TASKSTART( itt_domain_dec, itt_handle_ext );
268 picture->extendPicBorder( false, false, false, true, CH_C );
269 ITT_TASKEND( itt_domain_dec, itt_handle_ext );
270 return true;
271 };
272 m_decodeThreadPool->addBarrierTask<Picture>( task,
273 pic,
274 &pic->m_borderExtTaskCounter,
275 nullptr,
276 { wrapAround ? &pic->m_copyWrapBufDone : &pic->done} );
277 }
278 }
279
createSubPicRefBufs(Picture * pic)280 void DecLibRecon::createSubPicRefBufs( Picture* pic )
281 {
282 pic->subPicExtStarted = true;
283
284 const PPS* pps = pic->cs->pps.get();
285 const SPS* sps = pic->cs->sps.get();
286 const int numSubPic = pps->getNumSubPics();
287
288 pic->m_subPicRefBufs.resize( numSubPic );
289 for( int i = 0; i < numSubPic; ++i )
290 {
291 const SubPic& currSubPic = pps->getSubPic( i );
292 const Area subPicArea( currSubPic.getSubPicLeft(),
293 currSubPic.getSubPicTop(),
294 currSubPic.getSubPicWidthInLumaSample(),
295 currSubPic.getSubPicHeightInLumaSample() );
296
297 pic->m_subPicRefBufs[i].create( pic->chromaFormat, Size( subPicArea ), sps->getMaxCUWidth(), pic->margin, MEMORY_ALIGN_DEF_SIZE );
298
299 static auto task = []( int, SubPicExtTask* t ) {
300 t->subPicBuf->copyFrom( t->picture->getRecoBuf().subBuf( t->subPicArea ) );
301 t->picture->extendPicBorderBuf( *t->subPicBuf );
302 return true;
303 };
304 m_subPicExtTasks.emplace_back( SubPicExtTask{ pic, &pic->m_subPicRefBufs[i], subPicArea } );
305 m_decodeThreadPool->addBarrierTask<SubPicExtTask>( task, &m_subPicExtTasks.back(), &pic->m_borderExtTaskCounter, nullptr, { &pic->done } );
306 }
307 }
308
decompressPicture(Picture * pcPic)309 void DecLibRecon::decompressPicture( Picture* pcPic )
310 {
311 CodingStructure& cs = *pcPic->cs;
312
313 pcPic->inProgress = true;
314
315 #ifdef TRACE_ENABLE_ITT
316 // mark start of frame
317 pcPic->m_itt_decLibInst = m_itt_decInst;
318 __itt_frame_begin_v3( pcPic->m_itt_decLibInst, nullptr );
319 #endif
320
321 // Initialise the various objects for the new set of settings
322 const SPS * sps = cs.sps.get();
323 const PPS * pps = cs.pps.get();
324
325 for( int i = 0; i < m_numDecThreads; i++ )
326 {
327 if( sps->getUseReshaper() )
328 {
329 m_cReshaper[i].createDec( sps->getBitDepth( CHANNEL_TYPE_LUMA ) );
330 m_cReshaper[i].initSlice( pcPic->slices[0]->getNalUnitLayerId(), *pcPic->slices[0]->getPicHeader(), *pcPic->slices[0]->getVPS() );
331 }
332
333 m_cIntraPred[i].init( sps->getChromaFormatIdc(), sps->getBitDepth( CHANNEL_TYPE_LUMA ) );
334 m_cInterPred[i].init( &m_cRdCost, sps->getChromaFormatIdc(), sps->getMaxCUHeight() );
335
336 // Recursive structure
337 m_cTrQuant[i] .init( pcPic );
338 m_cCuDecoder[i].init( &m_cIntraPred[i], &m_cInterPred[i], &m_cReshaper[i], &m_cTrQuant[i] );
339 }
340
341 const uint32_t log2SaoOffsetScaleLuma = (uint32_t) std::max(0, sps->getBitDepth(CHANNEL_TYPE_LUMA ) - MAX_SAO_TRUNCATED_BITDEPTH);
342 const uint32_t log2SaoOffsetScaleChroma = (uint32_t) std::max(0, sps->getBitDepth(CHANNEL_TYPE_CHROMA) - MAX_SAO_TRUNCATED_BITDEPTH);
343 const int maxDepth = getLog2(sps->getMaxCUWidth()) - pps->pcv->minCUWidthLog2;
344 m_cSAO.create( pps->getPicWidthInLumaSamples(),
345 pps->getPicHeightInLumaSamples(),
346 sps->getChromaFormatIdc(),
347 sps->getMaxCUWidth(),
348 sps->getMaxCUHeight(),
349 maxDepth,
350 log2SaoOffsetScaleLuma,
351 log2SaoOffsetScaleChroma
352 );
353
354 if( sps->getUseALF() )
355 {
356 m_cALF.create( cs.picHeader, sps, pps, m_numDecThreads );
357 }
358
359 const int widthInCtus = cs.pcv->widthInCtus;
360 const int heightInCtus = cs.pcv->heightInCtus;
361
362 if( sps->getIBCFlag() )
363 {
364 cs.initVIbcBuf( heightInCtus, sps->getChromaFormatIdc(), sps->getMaxCUHeight() );
365 }
366 pcPic->startProcessingTimer();
367
368 if( m_decodeThreadPool->numThreads() > 0 )
369 {
370 ITT_TASKSTART( itt_domain_dec, itt_handle_schedTasks );
371 }
372
373 picBarriers.clear();
374 #if ALLOW_MIDER_LF_DURING_PICEXT
375 CBarrierVec picExtBarriers;
376 #else
377 CBarrierVec &picExtBarriers = picBarriers;
378 #endif
379
380 const int numSubPic = cs.pps->getNumSubPics();
381 if( numSubPic > 1 )
382 {
383 m_subPicExtTasks.clear();
384 m_subPicExtTasks.reserve( pcPic->slices.size() * MAX_NUM_REF_PICS * numSubPic );
385 }
386
387 std::vector<Picture*> borderExtRefPics;
388 for( const Slice* slice : pcPic->slices )
389 {
390 if( slice->isIntra() )
391 {
392 continue;
393 }
394
395 for( int iDir = REF_PIC_LIST_0; iDir < NUM_REF_PIC_LIST_01; ++iDir )
396 {
397 for( int iRefIdx = 0; iRefIdx < slice->getNumRefIdx( ( RefPicList ) iDir ); iRefIdx++ )
398 {
399 Picture* refPic = slice->getNoConstRefPic( ( RefPicList ) iDir, iRefIdx );
400
401 if( std::find( borderExtRefPics.cbegin(), borderExtRefPics.cend(), refPic ) == borderExtRefPics.cend() )
402 {
403 borderExtRefPics.push_back( refPic );
404 }
405 }
406 }
407 }
408
409 for( Picture* refPic : borderExtRefPics )
410 {
411 if( !refPic->borderExtStarted )
412 {
413 // TODO: (GH) Can we bypass this border extension, when all subpics (>1) are treated as pics?
414 borderExtPic( refPic );
415 }
416
417 if( !refPic->subPicExtStarted && numSubPic > 1 && refPic->m_subPicRefBufs.size() != numSubPic )
418 {
419 CHECK( !refPic->m_subPicRefBufs.empty(), "Wrong number of subpics already present in reference picture" );
420 CHECK( cs.sps->getUseWrapAround(), "Wraparound + subpics not implemented" );
421
422 createSubPicRefBufs( refPic );
423 }
424
425 if( refPic->m_borderExtTaskCounter.isBlocked() &&
426 std::find( picExtBarriers.cbegin(), picExtBarriers.cend(), refPic->m_borderExtTaskCounter.donePtr() ) == picExtBarriers.cend() )
427 {
428 picExtBarriers.push_back( refPic->m_borderExtTaskCounter.donePtr() );
429 }
430
431 if( refPic->m_dmvrTaskCounter.isBlocked() &&
432 std::find( picBarriers.cbegin(), picBarriers.cend(), refPic->m_dmvrTaskCounter.donePtr() ) == picBarriers.cend() )
433 {
434 picBarriers.push_back( refPic->m_dmvrTaskCounter.donePtr() );
435 }
436 }
437
438 if( m_decodeThreadPool->numThreads() == 0 && (
439 std::any_of( picExtBarriers.cbegin(), picExtBarriers.cend(), []( const Barrier* b ) { return b->isBlocked(); } ) ||
440 std::any_of( picBarriers .cbegin(), picBarriers .cend(), []( const Barrier* b ) { return b->isBlocked(); } ) ) )
441 {
442 m_decodeThreadPool->processTasksOnMainThread();
443 }
444
445 const bool isIntra = std::all_of( pcPic->slices.begin(), pcPic->slices.end(), []( const Slice* pcSlice ) { return pcSlice->isIntra(); } );
446
447 const int numColPerTask = std::max( std::min( widthInCtus, ( widthInCtus / std::max( m_numDecThreads * ( isIntra ? 2 : 1 ), 1 ) ) + ( isIntra ? 0 : 1 ) ), 1 );
448 const int numTasksPerLine = widthInCtus / numColPerTask + !!( widthInCtus % numColPerTask );
449
450 #if ALLOW_MIDER_LF_DURING_PICEXT
451 pcPic->refPicExtDepBarriers = std::move( picExtBarriers );
452 #endif
453 #if !RECO_WHILE_PARSE
454 picBarriers.push_back( &cs.slice->parseDone );
455
456 #endif
457 const TaskType ctuStartState = MIDER;
458 const bool doALF = cs.sps->getUseALF() && !AdaptiveLoopFilter::getAlfSkipPic( cs );
459 commonTaskParam.reset( cs, ctuStartState, numTasksPerLine, doALF );
460
461 tasksDMVR = std::vector<LineTaskParam>( heightInCtus, LineTaskParam{ commonTaskParam, -1 } );
462 tasksCtu = std::vector<CtuTaskParam >( heightInCtus * numTasksPerLine, CtuTaskParam{ commonTaskParam, -1, -1, {} } );
463
464 pcPic->done.lock();
465
466 #if 0
467 // schedule in raster scan order
468 for( int line = 0; line < heightInCtus; ++line )
469 {
470 for( int col = 0; col < widthInCtus; ++col )
471 {
472 #else
473 // schedule in zig-zag scan order
474 for( int i = 0; i < numTasksPerLine + heightInCtus; ++i )
475 {
476 int line = 0;
477 for( int col = i; col >= 0; --col, ++line )
478 {
479 #endif
480 if( line < heightInCtus && col < numTasksPerLine )
481 {
482 CBarrierVec ctuBarriesrs = picBarriers;
483
484 #if RECO_WHILE_PARSE
485 const int ctuStart = col * numColPerTask;
486 const int ctuEnd = std::min( ctuStart + numColPerTask, widthInCtus );
487 for( int ctu = ctuStart; ctu < ctuEnd; ctu++ )
488 {
489 ctuBarriesrs.push_back( &pcPic->ctuParsedBarrier[line * widthInCtus + ctu] );
490 }
491 #endif
492 CtuTaskParam* param = &tasksCtu[line * numTasksPerLine + col];
493 param->line = line;
494 param->col = col;
495 param->numColPerTask = numColPerTask;
496 param->numTasksPerLine = numTasksPerLine;
497
498 m_decodeThreadPool->addBarrierTask<CtuTaskParam>( ctuTask<false>,
499 param,
500 &pcPic->m_ctuTaskCounter,
501 nullptr,
502 std::move( ctuBarriesrs ),
503 ctuTask<true> );
504 }
505 }
506 }
507
508 if( commonTaskParam.doALF )
509 {
510 AdaptiveLoopFilter::preparePic( cs );
511 commonTaskParam.alfPrepared.unlock();
512 }
513
514 {
515 static auto doneTask = []( int, Picture* picture )
516 {
517 CodingStructure& cs = *picture->cs;
518 if( cs.sps->getUseALF() && !AdaptiveLoopFilter::getAlfSkipPic( cs ) )
519 {
520 AdaptiveLoopFilter::swapBufs( cs );
521 }
522
523 picture->reconstructed = true;
524 #ifdef TRACE_ENABLE_ITT
525 // mark end of frame
526 __itt_frame_end_v3( picture->m_itt_decLibInst, nullptr );
527 #endif
528 picture->stopProcessingTimer();
529
530 return true;
531 };
532 m_decodeThreadPool->addBarrierTask<Picture>( doneTask, pcPic, nullptr, &pcPic->done, { pcPic->m_ctuTaskCounter.donePtr() } );
533 }
534
535 if( pcPic->referenced )
536 {
537 static auto task = []( int tid, LineTaskParam* param )
538 {
539 ITT_TASKSTART( itt_domain_dec, itt_handle_dmvr );
540 auto& cs = *param->common.cs;
541 for( int col = 0; col < cs.pcv->widthInCtus; col++ )
542 {
543 param->common.decLib.m_cCuDecoder[tid].TaskDeriveDMVRMotionInfo( cs, getCtuArea( cs, col, param->line, true ) );
544 }
545 ITT_TASKEND( itt_domain_dec, itt_handle_dmvr );
546 return true;
547 };
548
549 for( int taskLineDMVR = 0; taskLineDMVR < heightInCtus; taskLineDMVR++ )
550 {
551 auto param = &tasksDMVR[taskLineDMVR];
552 param->line = taskLineDMVR;
553 m_decodeThreadPool->addBarrierTask<LineTaskParam>( task,
554 param,
555 &pcPic->m_dmvrTaskCounter,
556 nullptr,
557 { &commonTaskParam.dmvrTriggers[taskLineDMVR], &pcPic->parseDone } );
558 }
559
560 {
561 // dummy task to propagate exceptions from the ctu-decoding tasks to the dmvrTaskCounter
562 static auto dummyTask = []( int, void* ) { return true; };
563 m_decodeThreadPool->addBarrierTask<void>( dummyTask, nullptr, &pcPic->m_dmvrTaskCounter, nullptr, { pcPic->m_ctuTaskCounter.donePtr() } );
564 }
565 }
566
567 if( m_decodeThreadPool->numThreads() == 0 )
568 {
569 }
570 else
571 {
572 ITT_TASKEND( itt_domain_dec, itt_handle_schedTasks );
573 }
574
575 m_currDecompPic = pcPic;
576 }
577
578 Picture* DecLibRecon::waitForPrevDecompressedPic()
579 {
580 if( !m_currDecompPic )
581 return nullptr;
582
583 ITT_TASKSTART( itt_domain_dec, itt_handle_waitTasks );
584 if( m_decodeThreadPool->numThreads() == 0 )
585 {
586 m_decodeThreadPool->processTasksOnMainThread();
587 CHECK( m_currDecompPic->m_dmvrTaskCounter.isBlocked() || m_currDecompPic->done.isBlocked(), "can't make progress. some dependecy has not been finished" );
588 }
589 m_currDecompPic->m_dmvrTaskCounter.wait();
590 m_currDecompPic->done.wait();
591 ITT_TASKEND( itt_domain_dec, itt_handle_waitTasks );
592
593 m_currDecompPic->inProgress = false;
594 return std::exchange( m_currDecompPic, nullptr );
595 }
596
597 template<bool onlyCheckReadyState>
598 bool DecLibRecon::ctuTask( int tid, CtuTaskParam* param )
599 {
600 const int col = param->col;
601 const int line = param->line;
602
603 auto& cs = *param->common.cs;
604 auto& decLib = param->common.decLib;
605 const int widthInCtus = param->numTasksPerLine;
606 const int heightInCtus = cs.pcv->heightInCtus;
607
608 CtuState& thisCtuState = param->common.ctuStates[line * widthInCtus + col];
609 const CtuState* thisLine = ¶m->common.ctuStates[line * widthInCtus];
610 const CtuState* lineAbove = thisLine - widthInCtus;
611 const CtuState* lineBelow = thisLine + widthInCtus;
612
613 const int ctuStart = col * param->numColPerTask;
614 const int ctuEnd = std::min<int>( ctuStart + param->numColPerTask, cs.pcv->widthInCtus );
615
616 try
617 {
618 if( cs.picture->m_ctuTaskCounter.hasException() )
619 {
620 std::rethrow_exception( cs.picture->m_ctuTaskCounter.getException() );
621 }
622
623 switch( thisCtuState.load() )
624 {
625 // all case statements fall through to continue with next task, unless they return false due to unsatisfied preconditions
626
627 case MIDER:
628 {
629 if( col > 0 && thisLine[col - 1] <= MIDER )
630 return false;
631 if( line > 0 && lineAbove[std::min( col + 1, widthInCtus - 1 )] <= MIDER )
632 return false;
633 if( onlyCheckReadyState )
634 return true;
635
636 ITT_TASKSTART( itt_domain_dec, itt_handle_mider );
637
638 for( int ctu = ctuStart; ctu < ctuEnd; ctu++ )
639 {
640 CtuData& ctuData = cs.getCtuData( ctu, line );
641 GCC_WARNING_DISABLE_class_memaccess
642 memset( ctuData.motion, 0, sizeof( CtuData::motion ) );
643 GCC_WARNING_RESET
644
645 if( !ctuData.slice->isIntra() || cs.sps->getIBCFlag() )
646 {
647 const UnitArea ctuArea = getCtuArea( cs, ctu, line, true );
648 decLib.m_cCuDecoder[tid].TaskDeriveCtuMotionInfo( cs, ctuArea, param->common.perLineMiHist[line] );
649 }
650 }
651 thisCtuState = ( TaskType )( MIDER + 1 );
652
653 ITT_TASKEND( itt_domain_dec, itt_handle_mider );
654 }
655
656 case LF_INIT:
657 {
658 if( onlyCheckReadyState )
659 return true;
660
661 ITT_TASKSTART( itt_domain_dec, itt_handle_lfcl );
662
663 for( int ctu = ctuStart; ctu < ctuEnd; ctu++ )
664 {
665 CtuData& ctuData = cs.getCtuData( ctu, line );
666 memset( ctuData.lfParam, 0, sizeof( CtuData::lfParam ) );
667
668 const UnitArea ctuArea = getCtuArea( cs, ctu, line, true );
669 decLib.m_cLoopFilter.calcFilterStrengthsCTU( cs, ctuArea );
670 }
671
672 thisCtuState = ( TaskType )( LF_INIT + 1 );
673
674 ITT_TASKEND( itt_domain_dec, itt_handle_lfcl );
675 }
676
677 case INTER:
678 {
679 if( std::all_of( cs.picture->slices.begin(), cs.picture->slices.end(), []( const Slice* pcSlice ) { return pcSlice->isIntra(); } ) )
680 {
681 // not really necessary, but only for optimizing the wave-fronts
682 if( col > 1 && thisLine[col - 2] <= INTER )
683 return false;
684 if( line > 0 && lineAbove[col] <= INTER )
685 return false;
686 }
687
688 if( std::any_of( cs.picture->refPicExtDepBarriers.cbegin(), cs.picture->refPicExtDepBarriers.cend(), []( const Barrier* b ) { return b->isBlocked(); } ) )
689 {
690 return false;
691 }
692
693 if( onlyCheckReadyState )
694 return true;
695
696 ITT_TASKSTART( itt_domain_dec, itt_handle_inter );
697
698 for( int ctu = ctuStart; ctu < ctuEnd; ctu++ )
699 {
700 const CtuData& ctuData = cs.getCtuData( ctu, line );
701 const UnitArea ctuArea = getCtuArea( cs, ctu, line, true );
702
703 decLib.m_cCuDecoder[tid].TaskTrafoCtu( cs, ctuArea );
704
705 if( !ctuData.slice->isIntra() )
706 {
707 decLib.m_cCuDecoder[tid].TaskInterCtu( cs, ctuArea );
708 }
709 }
710
711 thisCtuState = ( TaskType )( INTER + 1 );
712
713 ITT_TASKEND( itt_domain_dec, itt_handle_inter );
714 }
715
716 case INTRA:
717 {
718 if( col > 0 && thisLine[col - 1] <= INTRA )
719 return false;
720 if( line > 0 && lineAbove[std::min( col + 1, widthInCtus - 1 )] <= INTRA )
721 return false;
722 if( onlyCheckReadyState )
723 return true;
724
725 ITT_TASKSTART( itt_domain_dec, itt_handle_intra );
726
727 for( int ctu = ctuStart; ctu < ctuEnd; ctu++ )
728 {
729 const UnitArea ctuArea = getCtuArea( cs, ctu, line, true );
730 decLib.m_cCuDecoder[tid].TaskCriticalIntraKernel( cs, ctuArea );
731 }
732
733 thisCtuState = ( TaskType )( INTRA + 1 );
734
735 ITT_TASKEND( itt_domain_dec, itt_handle_intra );
736 }
737
738 case RSP:
739 {
740 // RIRZIIIII
741 // IIIIIXXXX
742 //
743 // - Z can be reshaped when it is no more an intra prediction source for X in the next line
744
745
746 if ( line + 1 < heightInCtus && col + 1 < widthInCtus && lineBelow[col + 1] < RSP )
747 return false;
748 else if( line + 1 < heightInCtus && lineBelow[col] < RSP )
749 return false;
750 else if( col + 1 < widthInCtus && thisLine [col + 1] < RSP ) // need this for the last line
751 return false;
752
753 if( onlyCheckReadyState )
754 return true;
755
756 ITT_TASKSTART( itt_domain_dec, itt_handle_rsp );
757
758 for( int ctu = ctuStart; ctu < ctuEnd; ctu++ )
759 {
760 decLib.m_cReshaper[tid].rspCtu( cs, ctu, line, 0 );
761 }
762
763 ITT_TASKEND( itt_domain_dec, itt_handle_rsp );
764
765 thisCtuState = ( TaskType )( RSP + 1 );
766 }
767
768 case LF_V:
769 {
770 if( col > 0 && thisLine[col - 1] < LF_V )
771 return false;
772 if( onlyCheckReadyState )
773 return true;
774
775 ITT_TASKSTART( itt_domain_dec, itt_handle_lfl );
776
777 for( int ctu = ctuStart; ctu < ctuEnd; ctu++ )
778 {
779 decLib.m_cLoopFilter.loopFilterCTU( cs, MAX_NUM_CHANNEL_TYPE, ctu, line, 0, EDGE_VER );
780 }
781
782 thisCtuState = ( TaskType )( LF_V + 1 );
783
784 ITT_TASKEND( itt_domain_dec, itt_handle_lfl );
785 }
786
787 case LF_H:
788 {
789 if( line > 0 && lineAbove[col] < LF_H )
790 return false;
791
792 if( line > 0 && col + 1 < widthInCtus && lineAbove[col + 1] < LF_H )
793 return false;
794
795 if( col + 1 < widthInCtus && thisLine[col + 1] < LF_H )
796 return false;
797
798 if( onlyCheckReadyState )
799 return true;
800
801 ITT_TASKSTART( itt_domain_dec, itt_handle_lfl );
802
803 for( int ctu = ctuStart; ctu < ctuEnd; ctu++ )
804 {
805 decLib.m_cLoopFilter.loopFilterCTU( cs, MAX_NUM_CHANNEL_TYPE, ctu, line, 0, EDGE_HOR );
806 }
807
808 thisCtuState = ( TaskType )( LF_H + 1 );
809
810 ITT_TASKEND( itt_domain_dec, itt_handle_lfl );
811 }
812
813 case PRESAO:
814 {
815 // only last CTU processes full line
816 if( col == widthInCtus - 1 )
817 {
818 if( line > 0 && lineAbove[col] <= PRESAO )
819 return false;
820
821 for( int c = 0; c < widthInCtus; ++c )
822 {
823 if( thisLine[c] < PRESAO )
824 return false;
825
826 if( line + 1 < heightInCtus && lineBelow[c] < PRESAO )
827 return false;
828 }
829 if( onlyCheckReadyState )
830 return true;
831
832 ITT_TASKSTART( itt_domain_dec, itt_handle_presao );
833
834 if( cs.sps->getUseSAO() )
835 {
836 decLib.m_cSAO.SAOPrepareCTULine( cs, getLineArea( cs, line, true ) );
837 }
838 param->common.dmvrTriggers[line].unlock();
839
840 ITT_TASKEND( itt_domain_dec, itt_handle_presao );
841 }
842 else if( thisLine[widthInCtus - 1] <= PRESAO ) // wait for last CTU to finish PRESAO
843 {
844 return false;
845 }
846 if( onlyCheckReadyState )
847 return true;
848
849 thisCtuState = ( TaskType )( PRESAO + 1 );
850 }
851
852 case SAO:
853 {
854 if( onlyCheckReadyState )
855 return true;
856
857 // only last CTU processes full line
858 if( cs.sps->getUseSAO() )
859 {
860 ITT_TASKSTART( itt_domain_dec, itt_handle_sao );
861
862 for( int ctu = ctuStart; ctu < ctuEnd; ctu++ )
863 {
864 const UnitArea ctuArea = getCtuArea( cs, ctu, line, true );
865 decLib.m_cSAO.SAOProcessCTU( cs, ctuArea );
866 }
867
868 ITT_TASKEND( itt_domain_dec, itt_handle_sao );
869 }
870 if( param->common.doALF )
871 {
872 ITT_TASKSTART( itt_domain_dec, itt_handle_alf );
873
874 for( int ctu = ctuStart; ctu < ctuEnd; ctu++ )
875 {
876 AdaptiveLoopFilter::prepareCTU( cs, ctu, line );
877 }
878
879 ITT_TASKEND( itt_domain_dec, itt_handle_alf );
880 }
881
882 thisCtuState = ( TaskType )( SAO + 1 );
883 }
884
885 case ALF:
886 {
887 if( param->common.doALF )
888 {
889 const bool a = line > 0;
890 const bool b = line + 1 < heightInCtus;
891 const bool c = col > 0;
892 const bool d = col + 1 < widthInCtus;
893
894 if( param->common.alfPrepared.isBlocked() )
895 return false;
896
897 if( a )
898 {
899 if( c && lineAbove[col - 1] < ALF ) return false;
900 if( lineAbove[col ] < ALF ) return false;
901 if( d && lineAbove[col + 1] < ALF ) return false;
902 }
903
904 if( b )
905 {
906 if( c && lineBelow[col - 1] < ALF ) return false;
907 if( lineBelow[col ] < ALF ) return false;
908 if( d && lineBelow[col + 1] < ALF ) return false;
909 }
910
911 if( c && thisLine[col - 1] < ALF ) return false;
912 if( d && thisLine[col + 1] < ALF ) return false;
913
914 if( onlyCheckReadyState )
915 return true;
916
917 ITT_TASKSTART( itt_domain_dec, itt_handle_alf );
918 for( int ctu = ctuStart; ctu < ctuEnd; ctu++ )
919 {
920 decLib.m_cALF.processCTU( cs, ctu, line, tid );
921 }
922 ITT_TASKEND( itt_domain_dec, itt_handle_alf );
923 }
924 else if( onlyCheckReadyState )
925 return true;
926
927 thisCtuState = ( TaskType )( ALF + 1 );
928 }
929
930 default:
931 CHECKD( thisCtuState != DONE, "Wrong CTU state" );
932 } // end switch
933 }
934 catch( ... )
935 {
936 for( auto& t: param->common.dmvrTriggers )
937 {
938 t.setException( std::current_exception() );
939 }
940 std::rethrow_exception( std::current_exception() );
941 }
942
943 return true;
944 }
945
946 }
947