1 /* The copyright in this software is being made available under the BSD
2  * License, included below. This software may be subject to other third party
3  * and contributor rights, including patent rights, and no such rights are
4  * granted under this license.
5  *
6  * Copyright (c) 2010-2014, ITU/ISO/IEC
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions are met:
11  *
12  *  * Redistributions of source code must retain the above copyright notice,
13  *    this list of conditions and the following disclaimer.
14  *  * Redistributions in binary form must reproduce the above copyright notice,
15  *    this list of conditions and the following disclaimer in the documentation
16  *    and/or other materials provided with the distribution.
17  *  * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
18  *    be used to endorse or promote products derived from this software without
19  *    specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
25  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
31  * THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 /** \file     TEncCu.cpp
35     \brief    Coding Unit (CU) encoder class
36 */
37 
38 #include <stdio.h>
39 #include "TEncTop.h"
40 #include "TEncCu.h"
41 #include "TEncAnalyze.h"
42 #include "TLibCommon/Debug.h"
43 
44 #include <cmath>
45 #include <algorithm>
46 using namespace std;
47 
48 
49 //! \ingroup TLibEncoder
50 //! \{
51 
52 // ====================================================================================================================
53 // Constructor / destructor / create / destroy
54 // ====================================================================================================================
55 
56 /**
57  \param    uiTotalDepth  total number of allowable depth
58  \param    uiMaxWidth    largest CU width
59  \param    uiMaxHeight   largest CU height
60  */
create(UChar uhTotalDepth,UInt uiMaxWidth,UInt uiMaxHeight,ChromaFormat chromaFormat)61 Void TEncCu::create(UChar uhTotalDepth, UInt uiMaxWidth, UInt uiMaxHeight, ChromaFormat chromaFormat)
62 {
63   Int i;
64 
65   m_uhTotalDepth   = uhTotalDepth + 1;
66   m_ppcBestCU      = new TComDataCU*[m_uhTotalDepth-1];
67   m_ppcTempCU      = new TComDataCU*[m_uhTotalDepth-1];
68 
69   m_ppcPredYuvBest = new TComYuv*[m_uhTotalDepth-1];
70   m_ppcResiYuvBest = new TComYuv*[m_uhTotalDepth-1];
71   m_ppcRecoYuvBest = new TComYuv*[m_uhTotalDepth-1];
72   m_ppcPredYuvTemp = new TComYuv*[m_uhTotalDepth-1];
73   m_ppcResiYuvTemp = new TComYuv*[m_uhTotalDepth-1];
74   m_ppcRecoYuvTemp = new TComYuv*[m_uhTotalDepth-1];
75   m_ppcOrigYuv     = new TComYuv*[m_uhTotalDepth-1];
76 
77   UInt uiNumPartitions;
78   for( i=0 ; i<m_uhTotalDepth-1 ; i++)
79   {
80     uiNumPartitions = 1<<( ( m_uhTotalDepth - i - 1 )<<1 );
81     UInt uiWidth  = uiMaxWidth  >> i;
82     UInt uiHeight = uiMaxHeight >> i;
83 
84     m_ppcBestCU[i] = new TComDataCU; m_ppcBestCU[i]->create( chromaFormat, uiNumPartitions, uiWidth, uiHeight, false, uiMaxWidth >> (m_uhTotalDepth - 1) );
85     m_ppcTempCU[i] = new TComDataCU; m_ppcTempCU[i]->create( chromaFormat, uiNumPartitions, uiWidth, uiHeight, false, uiMaxWidth >> (m_uhTotalDepth - 1) );
86 
87     m_ppcPredYuvBest[i] = new TComYuv; m_ppcPredYuvBest[i]->create(uiWidth, uiHeight, chromaFormat);
88     m_ppcResiYuvBest[i] = new TComYuv; m_ppcResiYuvBest[i]->create(uiWidth, uiHeight, chromaFormat);
89     m_ppcRecoYuvBest[i] = new TComYuv; m_ppcRecoYuvBest[i]->create(uiWidth, uiHeight, chromaFormat);
90 
91     m_ppcPredYuvTemp[i] = new TComYuv; m_ppcPredYuvTemp[i]->create(uiWidth, uiHeight, chromaFormat);
92     m_ppcResiYuvTemp[i] = new TComYuv; m_ppcResiYuvTemp[i]->create(uiWidth, uiHeight, chromaFormat);
93     m_ppcRecoYuvTemp[i] = new TComYuv; m_ppcRecoYuvTemp[i]->create(uiWidth, uiHeight, chromaFormat);
94 
95     m_ppcOrigYuv    [i] = new TComYuv; m_ppcOrigYuv    [i]->create(uiWidth, uiHeight, chromaFormat);
96   }
97 
98   m_bEncodeDQP          = false;
99   m_CodeChromaQpAdjFlag = false;
100   m_ChromaQpAdjIdc      = 0;
101 
102   // initialize partition order.
103   UInt* piTmp = &g_auiZscanToRaster[0];
104   initZscanToRaster( m_uhTotalDepth, 1, 0, piTmp);
105   initRasterToZscan( uiMaxWidth, uiMaxHeight, m_uhTotalDepth );
106 
107   // initialize conversion matrix from partition index to pel
108   initRasterToPelXY( uiMaxWidth, uiMaxHeight, m_uhTotalDepth );
109 }
110 
destroy()111 Void TEncCu::destroy()
112 {
113   Int i;
114 
115   for( i=0 ; i<m_uhTotalDepth-1 ; i++)
116   {
117     if(m_ppcBestCU[i])
118     {
119       m_ppcBestCU[i]->destroy();      delete m_ppcBestCU[i];      m_ppcBestCU[i] = NULL;
120     }
121     if(m_ppcTempCU[i])
122     {
123       m_ppcTempCU[i]->destroy();      delete m_ppcTempCU[i];      m_ppcTempCU[i] = NULL;
124     }
125     if(m_ppcPredYuvBest[i])
126     {
127       m_ppcPredYuvBest[i]->destroy(); delete m_ppcPredYuvBest[i]; m_ppcPredYuvBest[i] = NULL;
128     }
129     if(m_ppcResiYuvBest[i])
130     {
131       m_ppcResiYuvBest[i]->destroy(); delete m_ppcResiYuvBest[i]; m_ppcResiYuvBest[i] = NULL;
132     }
133     if(m_ppcRecoYuvBest[i])
134     {
135       m_ppcRecoYuvBest[i]->destroy(); delete m_ppcRecoYuvBest[i]; m_ppcRecoYuvBest[i] = NULL;
136     }
137     if(m_ppcPredYuvTemp[i])
138     {
139       m_ppcPredYuvTemp[i]->destroy(); delete m_ppcPredYuvTemp[i]; m_ppcPredYuvTemp[i] = NULL;
140     }
141     if(m_ppcResiYuvTemp[i])
142     {
143       m_ppcResiYuvTemp[i]->destroy(); delete m_ppcResiYuvTemp[i]; m_ppcResiYuvTemp[i] = NULL;
144     }
145     if(m_ppcRecoYuvTemp[i])
146     {
147       m_ppcRecoYuvTemp[i]->destroy(); delete m_ppcRecoYuvTemp[i]; m_ppcRecoYuvTemp[i] = NULL;
148     }
149     if(m_ppcOrigYuv[i])
150     {
151       m_ppcOrigYuv[i]->destroy();     delete m_ppcOrigYuv[i];     m_ppcOrigYuv[i] = NULL;
152     }
153   }
154   if(m_ppcBestCU)
155   {
156     delete [] m_ppcBestCU;
157     m_ppcBestCU = NULL;
158   }
159   if(m_ppcTempCU)
160   {
161     delete [] m_ppcTempCU;
162     m_ppcTempCU = NULL;
163   }
164 
165   if(m_ppcPredYuvBest)
166   {
167     delete [] m_ppcPredYuvBest;
168     m_ppcPredYuvBest = NULL;
169   }
170   if(m_ppcResiYuvBest)
171   {
172     delete [] m_ppcResiYuvBest;
173     m_ppcResiYuvBest = NULL;
174   }
175   if(m_ppcRecoYuvBest)
176   {
177     delete [] m_ppcRecoYuvBest;
178     m_ppcRecoYuvBest = NULL;
179   }
180   if(m_ppcPredYuvTemp)
181   {
182     delete [] m_ppcPredYuvTemp;
183     m_ppcPredYuvTemp = NULL;
184   }
185   if(m_ppcResiYuvTemp)
186   {
187     delete [] m_ppcResiYuvTemp;
188     m_ppcResiYuvTemp = NULL;
189   }
190   if(m_ppcRecoYuvTemp)
191   {
192     delete [] m_ppcRecoYuvTemp;
193     m_ppcRecoYuvTemp = NULL;
194   }
195   if(m_ppcOrigYuv)
196   {
197     delete [] m_ppcOrigYuv;
198     m_ppcOrigYuv = NULL;
199   }
200 }
201 
202 /** \param    pcEncTop      pointer of encoder class
203  */
init(TEncTop * pcEncTop)204 Void TEncCu::init( TEncTop* pcEncTop )
205 {
206   m_pcEncCfg           = pcEncTop;
207   m_pcPredSearch       = pcEncTop->getPredSearch();
208   m_pcTrQuant          = pcEncTop->getTrQuant();
209   m_pcRdCost           = pcEncTop->getRdCost();
210 
211   m_pcEntropyCoder     = pcEncTop->getEntropyCoder();
212   m_pcBinCABAC         = pcEncTop->getBinCABAC();
213 
214   m_pppcRDSbacCoder    = pcEncTop->getRDSbacCoder();
215   m_pcRDGoOnSbacCoder  = pcEncTop->getRDGoOnSbacCoder();
216 
217   m_pcRateCtrl         = pcEncTop->getRateCtrl();
218 }
219 
220 // ====================================================================================================================
221 // Public member functions
222 // ====================================================================================================================
223 
224 /** \param  rpcCU pointer of CU data class
225  */
compressCtu(TComDataCU * pCtu)226 Void TEncCu::compressCtu( TComDataCU* pCtu )
227 {
228   // initialize CU data
229   m_ppcBestCU[0]->initCtu( pCtu->getPic(), pCtu->getCtuRsAddr() );
230   m_ppcTempCU[0]->initCtu( pCtu->getPic(), pCtu->getCtuRsAddr() );
231 
232   // analysis of CU
233   DEBUG_STRING_NEW(sDebug)
234 
235   xCompressCU( m_ppcBestCU[0], m_ppcTempCU[0], 0 DEBUG_STRING_PASS_INTO(sDebug) );
236   DEBUG_STRING_OUTPUT(std::cout, sDebug)
237 
238 #if ADAPTIVE_QP_SELECTION
239   if( m_pcEncCfg->getUseAdaptQpSelect() )
240   {
241     if(pCtu->getSlice()->getSliceType()!=I_SLICE) //IIII
242     {
243       xCtuCollectARLStats( pCtu );
244     }
245   }
246 #endif
247 }
248 /** \param  pcCU  pointer of CU data class
249  */
encodeCtu(TComDataCU * pCtu)250 Void TEncCu::encodeCtu ( TComDataCU* pCtu )
251 {
252   if ( pCtu->getSlice()->getPPS()->getUseDQP() )
253   {
254     setdQPFlag(true);
255   }
256 
257   if ( pCtu->getSlice()->getUseChromaQpAdj() )
258   {
259     setCodeChromaQpAdjFlag(true);
260   }
261 
262   // Encode CU data
263   xEncodeCU( pCtu, 0, 0 );
264 }
265 
266 // ====================================================================================================================
267 // Protected member functions
268 // ====================================================================================================================
269 /** Derive small set of test modes for AMP encoder speed-up
270  *\param   rpcBestCU
271  *\param   eParentPartSize
272  *\param   bTestAMP_Hor
273  *\param   bTestAMP_Ver
274  *\param   bTestMergeAMP_Hor
275  *\param   bTestMergeAMP_Ver
276  *\returns Void
277 */
278 #if AMP_ENC_SPEEDUP
279 #if AMP_MRG
deriveTestModeAMP(TComDataCU * pcBestCU,PartSize eParentPartSize,Bool & bTestAMP_Hor,Bool & bTestAMP_Ver,Bool & bTestMergeAMP_Hor,Bool & bTestMergeAMP_Ver)280 Void TEncCu::deriveTestModeAMP (TComDataCU *pcBestCU, PartSize eParentPartSize, Bool &bTestAMP_Hor, Bool &bTestAMP_Ver, Bool &bTestMergeAMP_Hor, Bool &bTestMergeAMP_Ver)
281 #else
282 Void TEncCu::deriveTestModeAMP (TComDataCU *pcBestCU, PartSize eParentPartSize, Bool &bTestAMP_Hor, Bool &bTestAMP_Ver)
283 #endif
284 {
285   if ( pcBestCU->getPartitionSize(0) == SIZE_2NxN )
286   {
287     bTestAMP_Hor = true;
288   }
289   else if ( pcBestCU->getPartitionSize(0) == SIZE_Nx2N )
290   {
291     bTestAMP_Ver = true;
292   }
293   else if ( pcBestCU->getPartitionSize(0) == SIZE_2Nx2N && pcBestCU->getMergeFlag(0) == false && pcBestCU->isSkipped(0) == false )
294   {
295     bTestAMP_Hor = true;
296     bTestAMP_Ver = true;
297   }
298 
299 #if AMP_MRG
300   //! Utilizing the partition size of parent PU
301   if ( eParentPartSize >= SIZE_2NxnU && eParentPartSize <= SIZE_nRx2N )
302   {
303     bTestMergeAMP_Hor = true;
304     bTestMergeAMP_Ver = true;
305   }
306 
307   if ( eParentPartSize == NUMBER_OF_PART_SIZES ) //! if parent is intra
308   {
309     if ( pcBestCU->getPartitionSize(0) == SIZE_2NxN )
310     {
311       bTestMergeAMP_Hor = true;
312     }
313     else if ( pcBestCU->getPartitionSize(0) == SIZE_Nx2N )
314     {
315       bTestMergeAMP_Ver = true;
316     }
317   }
318 
319   if ( pcBestCU->getPartitionSize(0) == SIZE_2Nx2N && pcBestCU->isSkipped(0) == false )
320   {
321     bTestMergeAMP_Hor = true;
322     bTestMergeAMP_Ver = true;
323   }
324 
325   if ( pcBestCU->getWidth(0) == 64 )
326   {
327     bTestAMP_Hor = false;
328     bTestAMP_Ver = false;
329   }
330 #else
331   //! Utilizing the partition size of parent PU
332   if ( eParentPartSize >= SIZE_2NxnU && eParentPartSize <= SIZE_nRx2N )
333   {
334     bTestAMP_Hor = true;
335     bTestAMP_Ver = true;
336   }
337 
338   if ( eParentPartSize == SIZE_2Nx2N )
339   {
340     bTestAMP_Hor = false;
341     bTestAMP_Ver = false;
342   }
343 #endif
344 }
345 #endif
346 
347 
348 // ====================================================================================================================
349 // Protected member functions
350 // ====================================================================================================================
351 /** Compress a CU block recursively with enabling sub-CTU-level delta QP
352  *\param   rpcBestCU
353  *\param   rpcTempCU
354  *\param   uiDepth
355  *\returns Void
356  *
357  *- for loop of QP value to compress the current CU with all possible QP
358 */
359 #if AMP_ENC_SPEEDUP
xCompressCU(TComDataCU * & rpcBestCU,TComDataCU * & rpcTempCU,UInt uiDepth DEBUG_STRING_FN_DECLARE (sDebug_),PartSize eParentPartSize)360 Void TEncCu::xCompressCU( TComDataCU*& rpcBestCU, TComDataCU*& rpcTempCU, UInt uiDepth DEBUG_STRING_FN_DECLARE(sDebug_), PartSize eParentPartSize )
361 #else
362 Void TEncCu::xCompressCU( TComDataCU*& rpcBestCU, TComDataCU*& rpcTempCU, UInt uiDepth )
363 #endif
364 {
365   TComPic* pcPic = rpcBestCU->getPic();
366   DEBUG_STRING_NEW(sDebug)
367 
368   // get Original YUV data from picture
369   m_ppcOrigYuv[uiDepth]->copyFromPicYuv( pcPic->getPicYuvOrg(), rpcBestCU->getCtuRsAddr(), rpcBestCU->getZorderIdxInCtu() );
370 
371     // variable for Early CU determination
372   Bool    bSubBranch = true;
373 
374   // variable for Cbf fast mode PU decision
375   Bool    doNotBlockPu = true;
376   Bool    earlyDetectionSkipMode = false;
377 
378   Bool bBoundary = false;
379   UInt uiLPelX   = rpcBestCU->getCUPelX();
380   UInt uiRPelX   = uiLPelX + rpcBestCU->getWidth(0)  - 1;
381   UInt uiTPelY   = rpcBestCU->getCUPelY();
382   UInt uiBPelY   = uiTPelY + rpcBestCU->getHeight(0) - 1;
383 
384   Int iBaseQP = xComputeQP( rpcBestCU, uiDepth );
385   Int iMinQP;
386   Int iMaxQP;
387   Bool isAddLowestQP = false;
388 
389   const UInt numberValidComponents = rpcBestCU->getPic()->getNumberValidComponents();
390 
391   if( (g_uiMaxCUWidth>>uiDepth) >= rpcTempCU->getSlice()->getPPS()->getMinCuDQPSize() )
392   {
393     Int idQP = m_pcEncCfg->getMaxDeltaQP();
394     iMinQP = Clip3( -rpcTempCU->getSlice()->getSPS()->getQpBDOffset(CHANNEL_TYPE_LUMA), MAX_QP, iBaseQP-idQP );
395     iMaxQP = Clip3( -rpcTempCU->getSlice()->getSPS()->getQpBDOffset(CHANNEL_TYPE_LUMA), MAX_QP, iBaseQP+idQP );
396   }
397   else
398   {
399     iMinQP = rpcTempCU->getQP(0);
400     iMaxQP = rpcTempCU->getQP(0);
401   }
402 
403   if ( m_pcEncCfg->getUseRateCtrl() )
404   {
405     iMinQP = m_pcRateCtrl->getRCQP();
406     iMaxQP = m_pcRateCtrl->getRCQP();
407   }
408 
409   // transquant-bypass (TQB) processing loop variable initialisation ---
410 
411   const Int lowestQP = iMinQP; // For TQB, use this QP which is the lowest non TQB QP tested (rather than QP'=0) - that way delta QPs are smaller, and TQB can be tested at all CU levels.
412 
413   if ( (rpcTempCU->getSlice()->getPPS()->getTransquantBypassEnableFlag()) )
414   {
415     isAddLowestQP = true; // mark that the first iteration is to cost TQB mode.
416     iMinQP = iMinQP - 1;  // increase loop variable range by 1, to allow testing of TQB mode along with other QPs
417     if ( m_pcEncCfg->getCUTransquantBypassFlagForceValue() )
418     {
419       iMaxQP = iMinQP;
420     }
421   }
422 
423   TComSlice * pcSlice = rpcTempCU->getPic()->getSlice(rpcTempCU->getPic()->getCurrSliceIdx());
424   // We need to split, so don't try these modes.
425   if ( ( uiRPelX < rpcBestCU->getSlice()->getSPS()->getPicWidthInLumaSamples() ) &&
426        ( uiBPelY < rpcBestCU->getSlice()->getSPS()->getPicHeightInLumaSamples() ) )
427   {
428     for (Int iQP=iMinQP; iQP<=iMaxQP; iQP++)
429     {
430       const Bool bIsLosslessMode = isAddLowestQP && (iQP == iMinQP);
431 
432       if (bIsLosslessMode)
433       {
434         iQP = lowestQP;
435       }
436 
437       m_ChromaQpAdjIdc = 0;
438       if (pcSlice->getUseChromaQpAdj())
439       {
440         /* Pre-estimation of chroma QP based on input block activity may be performed
441          * here, using for example m_ppcOrigYuv[uiDepth] */
442         /* To exercise the current code, the index used for adjustment is based on
443          * block position
444          */
445         Int lgMinCuSize = pcSlice->getSPS()->getLog2MinCodingBlockSize();
446         m_ChromaQpAdjIdc = ((uiLPelX >> lgMinCuSize) + (uiTPelY >> lgMinCuSize)) % (pcSlice->getPPS()->getChromaQpAdjTableSize() + 1);
447       }
448 
449       rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
450 
451       // do inter modes, SKIP and 2Nx2N
452       if( rpcBestCU->getSlice()->getSliceType() != I_SLICE )
453       {
454         // 2Nx2N
455         if(m_pcEncCfg->getUseEarlySkipDetection())
456         {
457           xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_2Nx2N DEBUG_STRING_PASS_INTO(sDebug) );
458           rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );//by Competition for inter_2Nx2N
459         }
460         // SKIP
461         xCheckRDCostMerge2Nx2N( rpcBestCU, rpcTempCU DEBUG_STRING_PASS_INTO(sDebug), &earlyDetectionSkipMode );//by Merge for inter_2Nx2N
462         rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
463 
464         if(!m_pcEncCfg->getUseEarlySkipDetection())
465         {
466           // 2Nx2N, NxN
467           xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_2Nx2N DEBUG_STRING_PASS_INTO(sDebug) );
468           rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
469           if(m_pcEncCfg->getUseCbfFastMode())
470           {
471             doNotBlockPu = rpcBestCU->getQtRootCbf( 0 ) != 0;
472           }
473         }
474       }
475 
476       if (bIsLosslessMode) // Restore loop variable if lossless mode was searched.
477       {
478         iQP = iMinQP;
479       }
480     }
481 
482     if(!earlyDetectionSkipMode)
483     {
484       for (Int iQP=iMinQP; iQP<=iMaxQP; iQP++)
485       {
486         const Bool bIsLosslessMode = isAddLowestQP && (iQP == iMinQP); // If lossless, then iQP is irrelevant for subsequent modules.
487 
488         if (bIsLosslessMode)
489         {
490           iQP = lowestQP;
491         }
492 
493         rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
494 
495         // do inter modes, NxN, 2NxN, and Nx2N
496         if( rpcBestCU->getSlice()->getSliceType() != I_SLICE )
497         {
498           // 2Nx2N, NxN
499           if(!( (rpcBestCU->getWidth(0)==8) && (rpcBestCU->getHeight(0)==8) ))
500           {
501             if( uiDepth == g_uiMaxCUDepth - g_uiAddCUDepth && doNotBlockPu)
502             {
503               xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_NxN DEBUG_STRING_PASS_INTO(sDebug)   );
504               rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
505             }
506           }
507 
508           if(doNotBlockPu)
509           {
510             xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_Nx2N DEBUG_STRING_PASS_INTO(sDebug)  );
511             rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
512             if(m_pcEncCfg->getUseCbfFastMode() && rpcBestCU->getPartitionSize(0) == SIZE_Nx2N )
513             {
514               doNotBlockPu = rpcBestCU->getQtRootCbf( 0 ) != 0;
515             }
516           }
517           if(doNotBlockPu)
518           {
519             xCheckRDCostInter      ( rpcBestCU, rpcTempCU, SIZE_2NxN DEBUG_STRING_PASS_INTO(sDebug)  );
520             rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
521             if(m_pcEncCfg->getUseCbfFastMode() && rpcBestCU->getPartitionSize(0) == SIZE_2NxN)
522             {
523               doNotBlockPu = rpcBestCU->getQtRootCbf( 0 ) != 0;
524             }
525           }
526 
527           //! Try AMP (SIZE_2NxnU, SIZE_2NxnD, SIZE_nLx2N, SIZE_nRx2N)
528           if( pcPic->getSlice(0)->getSPS()->getAMPAcc(uiDepth) )
529           {
530 #if AMP_ENC_SPEEDUP
531             Bool bTestAMP_Hor = false, bTestAMP_Ver = false;
532 
533 #if AMP_MRG
534             Bool bTestMergeAMP_Hor = false, bTestMergeAMP_Ver = false;
535 
536             deriveTestModeAMP (rpcBestCU, eParentPartSize, bTestAMP_Hor, bTestAMP_Ver, bTestMergeAMP_Hor, bTestMergeAMP_Ver);
537 #else
538             deriveTestModeAMP (rpcBestCU, eParentPartSize, bTestAMP_Hor, bTestAMP_Ver);
539 #endif
540 
541             //! Do horizontal AMP
542             if ( bTestAMP_Hor )
543             {
544               if(doNotBlockPu)
545               {
546                 xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_2NxnU DEBUG_STRING_PASS_INTO(sDebug) );
547                 rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
548                 if(m_pcEncCfg->getUseCbfFastMode() && rpcBestCU->getPartitionSize(0) == SIZE_2NxnU )
549                 {
550                   doNotBlockPu = rpcBestCU->getQtRootCbf( 0 ) != 0;
551                 }
552               }
553               if(doNotBlockPu)
554               {
555                 xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_2NxnD DEBUG_STRING_PASS_INTO(sDebug) );
556                 rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
557                 if(m_pcEncCfg->getUseCbfFastMode() && rpcBestCU->getPartitionSize(0) == SIZE_2NxnD )
558                 {
559                   doNotBlockPu = rpcBestCU->getQtRootCbf( 0 ) != 0;
560                 }
561               }
562             }
563 #if AMP_MRG
564             else if ( bTestMergeAMP_Hor )
565             {
566               if(doNotBlockPu)
567               {
568                 xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_2NxnU DEBUG_STRING_PASS_INTO(sDebug), true );
569                 rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
570                 if(m_pcEncCfg->getUseCbfFastMode() && rpcBestCU->getPartitionSize(0) == SIZE_2NxnU )
571                 {
572                   doNotBlockPu = rpcBestCU->getQtRootCbf( 0 ) != 0;
573                 }
574               }
575               if(doNotBlockPu)
576               {
577                 xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_2NxnD DEBUG_STRING_PASS_INTO(sDebug), true );
578                 rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
579                 if(m_pcEncCfg->getUseCbfFastMode() && rpcBestCU->getPartitionSize(0) == SIZE_2NxnD )
580                 {
581                   doNotBlockPu = rpcBestCU->getQtRootCbf( 0 ) != 0;
582                 }
583               }
584             }
585 #endif
586 
587             //! Do horizontal AMP
588             if ( bTestAMP_Ver )
589             {
590               if(doNotBlockPu)
591               {
592                 xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_nLx2N DEBUG_STRING_PASS_INTO(sDebug) );
593                 rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
594                 if(m_pcEncCfg->getUseCbfFastMode() && rpcBestCU->getPartitionSize(0) == SIZE_nLx2N )
595                 {
596                   doNotBlockPu = rpcBestCU->getQtRootCbf( 0 ) != 0;
597                 }
598               }
599               if(doNotBlockPu)
600               {
601                 xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_nRx2N DEBUG_STRING_PASS_INTO(sDebug) );
602                 rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
603               }
604             }
605 #if AMP_MRG
606             else if ( bTestMergeAMP_Ver )
607             {
608               if(doNotBlockPu)
609               {
610                 xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_nLx2N DEBUG_STRING_PASS_INTO(sDebug), true );
611                 rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
612                 if(m_pcEncCfg->getUseCbfFastMode() && rpcBestCU->getPartitionSize(0) == SIZE_nLx2N )
613                 {
614                   doNotBlockPu = rpcBestCU->getQtRootCbf( 0 ) != 0;
615                 }
616               }
617               if(doNotBlockPu)
618               {
619                 xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_nRx2N DEBUG_STRING_PASS_INTO(sDebug), true );
620                 rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
621               }
622             }
623 #endif
624 
625 #else
626             xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_2NxnU );
627             rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
628             xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_2NxnD );
629             rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
630             xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_nLx2N );
631             rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
632 
633             xCheckRDCostInter( rpcBestCU, rpcTempCU, SIZE_nRx2N );
634             rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
635 
636 #endif
637           }
638         }
639 
640         // do normal intra modes
641         // speedup for inter frames
642         Double intraCost = 0.0;
643 
644         if((rpcBestCU->getSlice()->getSliceType() == I_SLICE)                                     ||
645            (rpcBestCU->getCbf( 0, COMPONENT_Y  ) != 0)                                            ||
646           ((rpcBestCU->getCbf( 0, COMPONENT_Cb ) != 0) && (numberValidComponents > COMPONENT_Cb)) ||
647           ((rpcBestCU->getCbf( 0, COMPONENT_Cr ) != 0) && (numberValidComponents > COMPONENT_Cr))  ) // avoid very complex intra if it is unlikely
648         {
649           xCheckRDCostIntra( rpcBestCU, rpcTempCU, intraCost, SIZE_2Nx2N DEBUG_STRING_PASS_INTO(sDebug) );
650           rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
651           if( uiDepth == g_uiMaxCUDepth - g_uiAddCUDepth )
652           {
653             if( rpcTempCU->getWidth(0) > ( 1 << rpcTempCU->getSlice()->getSPS()->getQuadtreeTULog2MinSize() ) )
654             {
655               Double tmpIntraCost;
656               xCheckRDCostIntra( rpcBestCU, rpcTempCU, tmpIntraCost, SIZE_NxN DEBUG_STRING_PASS_INTO(sDebug)   );
657               intraCost = std::min(intraCost, tmpIntraCost);
658               rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
659             }
660           }
661         }
662 
663         // test PCM
664         if(pcPic->getSlice(0)->getSPS()->getUsePCM()
665           && rpcTempCU->getWidth(0) <= (1<<pcPic->getSlice(0)->getSPS()->getPCMLog2MaxSize())
666           && rpcTempCU->getWidth(0) >= (1<<pcPic->getSlice(0)->getSPS()->getPCMLog2MinSize()) )
667         {
668           UInt uiRawBits = getTotalBits(rpcBestCU->getWidth(0), rpcBestCU->getHeight(0), rpcBestCU->getPic()->getChromaFormat(), g_bitDepth);
669           UInt uiBestBits = rpcBestCU->getTotalBits();
670           if((uiBestBits > uiRawBits) || (rpcBestCU->getTotalCost() > m_pcRdCost->calcRdCost(uiRawBits, 0)))
671           {
672             xCheckIntraPCM (rpcBestCU, rpcTempCU);
673             rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
674           }
675         }
676 
677         if (bIsLosslessMode) // Restore loop variable if lossless mode was searched.
678         {
679           iQP = iMinQP;
680         }
681       }
682     }
683 
684     m_pcEntropyCoder->resetBits();
685     m_pcEntropyCoder->encodeSplitFlag( rpcBestCU, 0, uiDepth, true );
686     rpcBestCU->getTotalBits() += m_pcEntropyCoder->getNumberOfWrittenBits(); // split bits
687     rpcBestCU->getTotalBins() += ((TEncBinCABAC *)((TEncSbac*)m_pcEntropyCoder->m_pcEntropyCoderIf)->getEncBinIf())->getBinsCoded();
688     rpcBestCU->getTotalCost()  = m_pcRdCost->calcRdCost( rpcBestCU->getTotalBits(), rpcBestCU->getTotalDistortion() );
689 
690     // Early CU determination
691     if( m_pcEncCfg->getUseEarlyCU() && rpcBestCU->isSkipped(0) )
692     {
693       bSubBranch = false;
694     }
695     else
696     {
697       bSubBranch = true;
698     }
699   }
700   else
701   {
702     bBoundary = true;
703   }
704 
705   // copy orginal YUV samples to PCM buffer
706   if( rpcBestCU->isLosslessCoded(0) && (rpcBestCU->getIPCMFlag(0) == false))
707   {
708     xFillPCMBuffer(rpcBestCU, m_ppcOrigYuv[uiDepth]);
709   }
710 
711   if( (g_uiMaxCUWidth>>uiDepth) == rpcTempCU->getSlice()->getPPS()->getMinCuDQPSize() )
712   {
713     Int idQP = m_pcEncCfg->getMaxDeltaQP();
714     iMinQP = Clip3( -rpcTempCU->getSlice()->getSPS()->getQpBDOffset(CHANNEL_TYPE_LUMA), MAX_QP, iBaseQP-idQP );
715     iMaxQP = Clip3( -rpcTempCU->getSlice()->getSPS()->getQpBDOffset(CHANNEL_TYPE_LUMA), MAX_QP, iBaseQP+idQP );
716   }
717   else if( (g_uiMaxCUWidth>>uiDepth) > rpcTempCU->getSlice()->getPPS()->getMinCuDQPSize() )
718   {
719     iMinQP = iBaseQP;
720     iMaxQP = iBaseQP;
721   }
722   else
723   {
724     const Int iStartQP = rpcTempCU->getQP(0);
725     iMinQP = iStartQP;
726     iMaxQP = iStartQP;
727   }
728 
729   if ( m_pcEncCfg->getUseRateCtrl() )
730   {
731     iMinQP = m_pcRateCtrl->getRCQP();
732     iMaxQP = m_pcRateCtrl->getRCQP();
733   }
734 
735   if ( m_pcEncCfg->getCUTransquantBypassFlagForceValue() )
736   {
737     iMaxQP = iMinQP; // If all TUs are forced into using transquant bypass, do not loop here.
738   }
739 
740   for (Int iQP=iMinQP; iQP<=iMaxQP; iQP++)
741   {
742     const Bool bIsLosslessMode = false; // False at this level. Next level down may set it to true.
743 
744     rpcTempCU->initEstData( uiDepth, iQP, bIsLosslessMode );
745 
746     // further split
747     if( bSubBranch && uiDepth < g_uiMaxCUDepth - g_uiAddCUDepth )
748     {
749       UChar       uhNextDepth         = uiDepth+1;
750       TComDataCU* pcSubBestPartCU     = m_ppcBestCU[uhNextDepth];
751       TComDataCU* pcSubTempPartCU     = m_ppcTempCU[uhNextDepth];
752       DEBUG_STRING_NEW(sTempDebug)
753 
754       for ( UInt uiPartUnitIdx = 0; uiPartUnitIdx < 4; uiPartUnitIdx++ )
755       {
756         pcSubBestPartCU->initSubCU( rpcTempCU, uiPartUnitIdx, uhNextDepth, iQP );           // clear sub partition datas or init.
757         pcSubTempPartCU->initSubCU( rpcTempCU, uiPartUnitIdx, uhNextDepth, iQP );           // clear sub partition datas or init.
758 
759         if( ( pcSubBestPartCU->getCUPelX() < pcSlice->getSPS()->getPicWidthInLumaSamples() ) && ( pcSubBestPartCU->getCUPelY() < pcSlice->getSPS()->getPicHeightInLumaSamples() ) )
760         {
761           if ( 0 == uiPartUnitIdx) //initialize RD with previous depth buffer
762           {
763             m_pppcRDSbacCoder[uhNextDepth][CI_CURR_BEST]->load(m_pppcRDSbacCoder[uiDepth][CI_CURR_BEST]);
764           }
765           else
766           {
767             m_pppcRDSbacCoder[uhNextDepth][CI_CURR_BEST]->load(m_pppcRDSbacCoder[uhNextDepth][CI_NEXT_BEST]);
768           }
769 
770 #if AMP_ENC_SPEEDUP
771           DEBUG_STRING_NEW(sChild)
772           if ( !rpcBestCU->isInter(0) )
773           {
774             xCompressCU( pcSubBestPartCU, pcSubTempPartCU, uhNextDepth DEBUG_STRING_PASS_INTO(sChild), NUMBER_OF_PART_SIZES );
775           }
776           else
777           {
778 
779             xCompressCU( pcSubBestPartCU, pcSubTempPartCU, uhNextDepth DEBUG_STRING_PASS_INTO(sChild), rpcBestCU->getPartitionSize(0) );
780           }
781           DEBUG_STRING_APPEND(sTempDebug, sChild)
782 #else
783           xCompressCU( pcSubBestPartCU, pcSubTempPartCU, uhNextDepth );
784 #endif
785 
786           rpcTempCU->copyPartFrom( pcSubBestPartCU, uiPartUnitIdx, uhNextDepth );         // Keep best part data to current temporary data.
787           xCopyYuv2Tmp( pcSubBestPartCU->getTotalNumPart()*uiPartUnitIdx, uhNextDepth );
788         }
789         else
790         {
791           pcSubBestPartCU->copyToPic( uhNextDepth );
792           rpcTempCU->copyPartFrom( pcSubBestPartCU, uiPartUnitIdx, uhNextDepth );
793         }
794       }
795 
796       if( !bBoundary )
797       {
798         m_pcEntropyCoder->resetBits();
799         m_pcEntropyCoder->encodeSplitFlag( rpcTempCU, 0, uiDepth, true );
800 
801         rpcTempCU->getTotalBits() += m_pcEntropyCoder->getNumberOfWrittenBits(); // split bits
802         rpcTempCU->getTotalBins() += ((TEncBinCABAC *)((TEncSbac*)m_pcEntropyCoder->m_pcEntropyCoderIf)->getEncBinIf())->getBinsCoded();
803       }
804       rpcTempCU->getTotalCost()  = m_pcRdCost->calcRdCost( rpcTempCU->getTotalBits(), rpcTempCU->getTotalDistortion() );
805 
806       if( (g_uiMaxCUWidth>>uiDepth) == rpcTempCU->getSlice()->getPPS()->getMinCuDQPSize() && rpcTempCU->getSlice()->getPPS()->getUseDQP())
807       {
808         Bool hasResidual = false;
809         for( UInt uiBlkIdx = 0; uiBlkIdx < rpcTempCU->getTotalNumPart(); uiBlkIdx ++)
810         {
811           if( (     rpcTempCU->getCbf(uiBlkIdx, COMPONENT_Y)
812                 || (rpcTempCU->getCbf(uiBlkIdx, COMPONENT_Cb) && (numberValidComponents > COMPONENT_Cb))
813                 || (rpcTempCU->getCbf(uiBlkIdx, COMPONENT_Cr) && (numberValidComponents > COMPONENT_Cr)) ) )
814           {
815             hasResidual = true;
816             break;
817           }
818         }
819 
820         UInt uiTargetPartIdx = 0;
821         if ( hasResidual )
822         {
823 #if !RDO_WITHOUT_DQP_BITS
824           m_pcEntropyCoder->resetBits();
825           m_pcEntropyCoder->encodeQP( rpcTempCU, uiTargetPartIdx, false );
826           rpcTempCU->getTotalBits() += m_pcEntropyCoder->getNumberOfWrittenBits(); // dQP bits
827           rpcTempCU->getTotalBins() += ((TEncBinCABAC *)((TEncSbac*)m_pcEntropyCoder->m_pcEntropyCoderIf)->getEncBinIf())->getBinsCoded();
828           rpcTempCU->getTotalCost()  = m_pcRdCost->calcRdCost( rpcTempCU->getTotalBits(), rpcTempCU->getTotalDistortion() );
829 #endif
830 
831           Bool foundNonZeroCbf = false;
832           rpcTempCU->setQPSubCUs( rpcTempCU->getRefQP( uiTargetPartIdx ), 0, uiDepth, foundNonZeroCbf );
833           assert( foundNonZeroCbf );
834         }
835         else
836         {
837           rpcTempCU->setQPSubParts( rpcTempCU->getRefQP( uiTargetPartIdx ), 0, uiDepth ); // set QP to default QP
838         }
839       }
840 
841       m_pppcRDSbacCoder[uhNextDepth][CI_NEXT_BEST]->store(m_pppcRDSbacCoder[uiDepth][CI_TEMP_BEST]);
842 
843       // TODO: this does not account for the slice bytes already written. See other instances of FIXED_NUMBER_OF_BYTES
844       Bool isEndOfSlice        = rpcBestCU->getSlice()->getSliceMode()==FIXED_NUMBER_OF_BYTES
845                                  && (rpcBestCU->getTotalBits()>rpcBestCU->getSlice()->getSliceArgument()<<3);
846       Bool isEndOfSliceSegment = rpcBestCU->getSlice()->getSliceSegmentMode()==FIXED_NUMBER_OF_BYTES
847                                  && (rpcBestCU->getTotalBits()>rpcBestCU->getSlice()->getSliceSegmentArgument()<<3);
848       if(isEndOfSlice||isEndOfSliceSegment)
849       {
850         if (m_pcEncCfg->getCostMode()==COST_MIXED_LOSSLESS_LOSSY_CODING)
851           rpcBestCU->getTotalCost()=rpcTempCU->getTotalCost() + (1.0 / m_pcRdCost->getLambda());
852         else
853           rpcBestCU->getTotalCost()=rpcTempCU->getTotalCost()+1;
854       }
855 
856       xCheckBestMode( rpcBestCU, rpcTempCU, uiDepth DEBUG_STRING_PASS_INTO(sDebug) DEBUG_STRING_PASS_INTO(sTempDebug) DEBUG_STRING_PASS_INTO(false) ); // RD compare current larger prediction
857                                                                                        // with sub partitioned prediction.
858     }
859   }
860 
861   DEBUG_STRING_APPEND(sDebug_, sDebug);
862 
863   rpcBestCU->copyToPic(uiDepth);                                                     // Copy Best data to Picture for next partition prediction.
864 
865   xCopyYuv2Pic( rpcBestCU->getPic(), rpcBestCU->getCtuRsAddr(), rpcBestCU->getZorderIdxInCtu(), uiDepth, uiDepth, rpcBestCU, uiLPelX, uiTPelY );   // Copy Yuv data to picture Yuv
866   if (bBoundary)
867   {
868     return;
869   }
870 
871   // Assert if Best prediction mode is NONE
872   // Selected mode's RD-cost must be not MAX_DOUBLE.
873   assert( rpcBestCU->getPartitionSize ( 0 ) != NUMBER_OF_PART_SIZES       );
874   assert( rpcBestCU->getPredictionMode( 0 ) != NUMBER_OF_PREDICTION_MODES );
875   assert( rpcBestCU->getTotalCost     (   ) != MAX_DOUBLE                 );
876 }
877 
878 /** finish encoding a cu and handle end-of-slice conditions
879  * \param pcCU
880  * \param uiAbsPartIdx
881  * \param uiDepth
882  * \returns Void
883  */
finishCU(TComDataCU * pcCU,UInt uiAbsPartIdx,UInt uiDepth)884 Void TEncCu::finishCU( TComDataCU* pcCU, UInt uiAbsPartIdx, UInt uiDepth )
885 {
886   TComPic* pcPic = pcCU->getPic();
887   TComSlice * pcSlice = pcCU->getPic()->getSlice(pcCU->getPic()->getCurrSliceIdx());
888 
889   //Calculate end address
890   const Int  currentCTUTsAddr = pcPic->getPicSym()->getCtuRsToTsAddrMap(pcCU->getCtuRsAddr());
891   const Bool isLastSubCUOfCtu = pcCU->isLastSubCUOfCtu(uiAbsPartIdx);
892   if ( isLastSubCUOfCtu )
893   {
894     // The 1-terminating bit is added to all streams, so don't add it here when it's 1.
895     // i.e. when the slice segment CurEnd CTU address is the current CTU address+1.
896     if (pcSlice->getSliceSegmentCurEndCtuTsAddr() != currentCTUTsAddr+1)
897     {
898       m_pcEntropyCoder->encodeTerminatingBit( 0 );
899     }
900   }
901 }
902 
903 /** Compute QP for each CU
904  * \param pcCU Target CU
905  * \param uiDepth CU depth
906  * \returns quantization parameter
907  */
xComputeQP(TComDataCU * pcCU,UInt uiDepth)908 Int TEncCu::xComputeQP( TComDataCU* pcCU, UInt uiDepth )
909 {
910   Int iBaseQp = pcCU->getSlice()->getSliceQp();
911   Int iQpOffset = 0;
912   if ( m_pcEncCfg->getUseAdaptiveQP() )
913   {
914     TEncPic* pcEPic = dynamic_cast<TEncPic*>( pcCU->getPic() );
915     UInt uiAQDepth = min( uiDepth, pcEPic->getMaxAQDepth()-1 );
916     TEncPicQPAdaptationLayer* pcAQLayer = pcEPic->getAQLayer( uiAQDepth );
917     UInt uiAQUPosX = pcCU->getCUPelX() / pcAQLayer->getAQPartWidth();
918     UInt uiAQUPosY = pcCU->getCUPelY() / pcAQLayer->getAQPartHeight();
919     UInt uiAQUStride = pcAQLayer->getAQPartStride();
920     TEncQPAdaptationUnit* acAQU = pcAQLayer->getQPAdaptationUnit();
921 
922     Double dMaxQScale = pow(2.0, m_pcEncCfg->getQPAdaptationRange()/6.0);
923     Double dAvgAct = pcAQLayer->getAvgActivity();
924     Double dCUAct = acAQU[uiAQUPosY * uiAQUStride + uiAQUPosX].getActivity();
925     Double dNormAct = (dMaxQScale*dCUAct + dAvgAct) / (dCUAct + dMaxQScale*dAvgAct);
926     Double dQpOffset = log(dNormAct) / log(2.0) * 6.0;
927     iQpOffset = Int(floor( dQpOffset + 0.49999 ));
928   }
929 
930   return Clip3(-pcCU->getSlice()->getSPS()->getQpBDOffset(CHANNEL_TYPE_LUMA), MAX_QP, iBaseQp+iQpOffset );
931 }
932 
933 /** encode a CU block recursively
934  * \param pcCU
935  * \param uiAbsPartIdx
936  * \param uiDepth
937  * \returns Void
938  */
xEncodeCU(TComDataCU * pcCU,UInt uiAbsPartIdx,UInt uiDepth)939 Void TEncCu::xEncodeCU( TComDataCU* pcCU, UInt uiAbsPartIdx, UInt uiDepth )
940 {
941   TComPic* pcPic = pcCU->getPic();
942 
943   Bool bBoundary = false;
944   UInt uiLPelX   = pcCU->getCUPelX() + g_auiRasterToPelX[ g_auiZscanToRaster[uiAbsPartIdx] ];
945   UInt uiRPelX   = uiLPelX + (g_uiMaxCUWidth>>uiDepth)  - 1;
946   UInt uiTPelY   = pcCU->getCUPelY() + g_auiRasterToPelY[ g_auiZscanToRaster[uiAbsPartIdx] ];
947   UInt uiBPelY   = uiTPelY + (g_uiMaxCUHeight>>uiDepth) - 1;
948 
949   TComSlice * pcSlice = pcCU->getPic()->getSlice(pcCU->getPic()->getCurrSliceIdx());
950   if( ( uiRPelX < pcSlice->getSPS()->getPicWidthInLumaSamples() ) && ( uiBPelY < pcSlice->getSPS()->getPicHeightInLumaSamples() ) )
951   {
952     m_pcEntropyCoder->encodeSplitFlag( pcCU, uiAbsPartIdx, uiDepth );
953   }
954   else
955   {
956     bBoundary = true;
957   }
958 
959   if( ( ( uiDepth < pcCU->getDepth( uiAbsPartIdx ) ) && ( uiDepth < (g_uiMaxCUDepth-g_uiAddCUDepth) ) ) || bBoundary )
960   {
961     UInt uiQNumParts = ( pcPic->getNumPartitionsInCtu() >> (uiDepth<<1) )>>2;
962     if( (g_uiMaxCUWidth>>uiDepth) == pcCU->getSlice()->getPPS()->getMinCuDQPSize() && pcCU->getSlice()->getPPS()->getUseDQP())
963     {
964       setdQPFlag(true);
965     }
966 
967     if( (g_uiMaxCUWidth>>uiDepth) == pcCU->getSlice()->getPPS()->getMinCuChromaQpAdjSize() && pcCU->getSlice()->getUseChromaQpAdj())
968     {
969       setCodeChromaQpAdjFlag(true);
970     }
971 
972     for ( UInt uiPartUnitIdx = 0; uiPartUnitIdx < 4; uiPartUnitIdx++, uiAbsPartIdx+=uiQNumParts )
973     {
974       uiLPelX   = pcCU->getCUPelX() + g_auiRasterToPelX[ g_auiZscanToRaster[uiAbsPartIdx] ];
975       uiTPelY   = pcCU->getCUPelY() + g_auiRasterToPelY[ g_auiZscanToRaster[uiAbsPartIdx] ];
976       if( ( uiLPelX < pcSlice->getSPS()->getPicWidthInLumaSamples() ) && ( uiTPelY < pcSlice->getSPS()->getPicHeightInLumaSamples() ) )
977       {
978         xEncodeCU( pcCU, uiAbsPartIdx, uiDepth+1 );
979       }
980     }
981     return;
982   }
983 
984   if( (g_uiMaxCUWidth>>uiDepth) >= pcCU->getSlice()->getPPS()->getMinCuDQPSize() && pcCU->getSlice()->getPPS()->getUseDQP())
985   {
986     setdQPFlag(true);
987   }
988 
989   if( (g_uiMaxCUWidth>>uiDepth) >= pcCU->getSlice()->getPPS()->getMinCuChromaQpAdjSize() && pcCU->getSlice()->getUseChromaQpAdj())
990   {
991     setCodeChromaQpAdjFlag(true);
992   }
993 
994   if (pcCU->getSlice()->getPPS()->getTransquantBypassEnableFlag())
995   {
996     m_pcEntropyCoder->encodeCUTransquantBypassFlag( pcCU, uiAbsPartIdx );
997   }
998 
999   if( !pcCU->getSlice()->isIntra() )
1000   {
1001     m_pcEntropyCoder->encodeSkipFlag( pcCU, uiAbsPartIdx );
1002   }
1003 
1004   if( pcCU->isSkipped( uiAbsPartIdx ) )
1005   {
1006     m_pcEntropyCoder->encodeMergeIndex( pcCU, uiAbsPartIdx );
1007     finishCU(pcCU,uiAbsPartIdx,uiDepth);
1008     return;
1009   }
1010 
1011   m_pcEntropyCoder->encodePredMode( pcCU, uiAbsPartIdx );
1012   m_pcEntropyCoder->encodePartSize( pcCU, uiAbsPartIdx, uiDepth );
1013 
1014   if (pcCU->isIntra( uiAbsPartIdx ) && pcCU->getPartitionSize( uiAbsPartIdx ) == SIZE_2Nx2N )
1015   {
1016     m_pcEntropyCoder->encodeIPCMInfo( pcCU, uiAbsPartIdx );
1017 
1018     if(pcCU->getIPCMFlag(uiAbsPartIdx))
1019     {
1020       // Encode slice finish
1021       finishCU(pcCU,uiAbsPartIdx,uiDepth);
1022       return;
1023     }
1024   }
1025 
1026   // prediction Info ( Intra : direction mode, Inter : Mv, reference idx )
1027   m_pcEntropyCoder->encodePredInfo( pcCU, uiAbsPartIdx );
1028 
1029   // Encode Coefficients
1030   Bool bCodeDQP = getdQPFlag();
1031   Bool codeChromaQpAdj = getCodeChromaQpAdjFlag();
1032   m_pcEntropyCoder->encodeCoeff( pcCU, uiAbsPartIdx, uiDepth, bCodeDQP, codeChromaQpAdj );
1033   setCodeChromaQpAdjFlag( codeChromaQpAdj );
1034   setdQPFlag( bCodeDQP );
1035 
1036   // --- write terminating bit ---
1037   finishCU(pcCU,uiAbsPartIdx,uiDepth);
1038 }
1039 
xCalcHADs8x8_ISlice(Pel * piOrg,Int iStrideOrg)1040 Int xCalcHADs8x8_ISlice(Pel *piOrg, Int iStrideOrg)
1041 {
1042   Int k, i, j, jj;
1043   Int diff[64], m1[8][8], m2[8][8], m3[8][8], iSumHad = 0;
1044 
1045   for( k = 0; k < 64; k += 8 )
1046   {
1047     diff[k+0] = piOrg[0] ;
1048     diff[k+1] = piOrg[1] ;
1049     diff[k+2] = piOrg[2] ;
1050     diff[k+3] = piOrg[3] ;
1051     diff[k+4] = piOrg[4] ;
1052     diff[k+5] = piOrg[5] ;
1053     diff[k+6] = piOrg[6] ;
1054     diff[k+7] = piOrg[7] ;
1055 
1056     piOrg += iStrideOrg;
1057   }
1058 
1059   //horizontal
1060   for (j=0; j < 8; j++)
1061   {
1062     jj = j << 3;
1063     m2[j][0] = diff[jj  ] + diff[jj+4];
1064     m2[j][1] = diff[jj+1] + diff[jj+5];
1065     m2[j][2] = diff[jj+2] + diff[jj+6];
1066     m2[j][3] = diff[jj+3] + diff[jj+7];
1067     m2[j][4] = diff[jj  ] - diff[jj+4];
1068     m2[j][5] = diff[jj+1] - diff[jj+5];
1069     m2[j][6] = diff[jj+2] - diff[jj+6];
1070     m2[j][7] = diff[jj+3] - diff[jj+7];
1071 
1072     m1[j][0] = m2[j][0] + m2[j][2];
1073     m1[j][1] = m2[j][1] + m2[j][3];
1074     m1[j][2] = m2[j][0] - m2[j][2];
1075     m1[j][3] = m2[j][1] - m2[j][3];
1076     m1[j][4] = m2[j][4] + m2[j][6];
1077     m1[j][5] = m2[j][5] + m2[j][7];
1078     m1[j][6] = m2[j][4] - m2[j][6];
1079     m1[j][7] = m2[j][5] - m2[j][7];
1080 
1081     m2[j][0] = m1[j][0] + m1[j][1];
1082     m2[j][1] = m1[j][0] - m1[j][1];
1083     m2[j][2] = m1[j][2] + m1[j][3];
1084     m2[j][3] = m1[j][2] - m1[j][3];
1085     m2[j][4] = m1[j][4] + m1[j][5];
1086     m2[j][5] = m1[j][4] - m1[j][5];
1087     m2[j][6] = m1[j][6] + m1[j][7];
1088     m2[j][7] = m1[j][6] - m1[j][7];
1089   }
1090 
1091   //vertical
1092   for (i=0; i < 8; i++)
1093   {
1094     m3[0][i] = m2[0][i] + m2[4][i];
1095     m3[1][i] = m2[1][i] + m2[5][i];
1096     m3[2][i] = m2[2][i] + m2[6][i];
1097     m3[3][i] = m2[3][i] + m2[7][i];
1098     m3[4][i] = m2[0][i] - m2[4][i];
1099     m3[5][i] = m2[1][i] - m2[5][i];
1100     m3[6][i] = m2[2][i] - m2[6][i];
1101     m3[7][i] = m2[3][i] - m2[7][i];
1102 
1103     m1[0][i] = m3[0][i] + m3[2][i];
1104     m1[1][i] = m3[1][i] + m3[3][i];
1105     m1[2][i] = m3[0][i] - m3[2][i];
1106     m1[3][i] = m3[1][i] - m3[3][i];
1107     m1[4][i] = m3[4][i] + m3[6][i];
1108     m1[5][i] = m3[5][i] + m3[7][i];
1109     m1[6][i] = m3[4][i] - m3[6][i];
1110     m1[7][i] = m3[5][i] - m3[7][i];
1111 
1112     m2[0][i] = m1[0][i] + m1[1][i];
1113     m2[1][i] = m1[0][i] - m1[1][i];
1114     m2[2][i] = m1[2][i] + m1[3][i];
1115     m2[3][i] = m1[2][i] - m1[3][i];
1116     m2[4][i] = m1[4][i] + m1[5][i];
1117     m2[5][i] = m1[4][i] - m1[5][i];
1118     m2[6][i] = m1[6][i] + m1[7][i];
1119     m2[7][i] = m1[6][i] - m1[7][i];
1120   }
1121 
1122   for (i = 0; i < 8; i++)
1123   {
1124     for (j = 0; j < 8; j++)
1125     {
1126       iSumHad += abs(m2[i][j]);
1127     }
1128   }
1129   iSumHad -= abs(m2[0][0]);
1130   iSumHad =(iSumHad+2)>>2;
1131   return(iSumHad);
1132 }
1133 
updateCtuDataISlice(TComDataCU * pCtu,Int width,Int height)1134 Int  TEncCu::updateCtuDataISlice(TComDataCU* pCtu, Int width, Int height)
1135 {
1136   Int  xBl, yBl;
1137   const Int iBlkSize = 8;
1138 
1139   Pel* pOrgInit   = pCtu->getPic()->getPicYuvOrg()->getAddr(COMPONENT_Y, pCtu->getCtuRsAddr(), 0);
1140   Int  iStrideOrig = pCtu->getPic()->getPicYuvOrg()->getStride(COMPONENT_Y);
1141   Pel  *pOrg;
1142 
1143   Int iSumHad = 0;
1144   for ( yBl=0; (yBl+iBlkSize)<=height; yBl+= iBlkSize)
1145   {
1146     for ( xBl=0; (xBl+iBlkSize)<=width; xBl+= iBlkSize)
1147     {
1148       pOrg = pOrgInit + iStrideOrig*yBl + xBl;
1149       iSumHad += xCalcHADs8x8_ISlice(pOrg, iStrideOrig);
1150     }
1151   }
1152   return(iSumHad);
1153 }
1154 
1155 /** check RD costs for a CU block encoded with merge
1156  * \param rpcBestCU
1157  * \param rpcTempCU
1158  * \returns Void
1159  */
xCheckRDCostMerge2Nx2N(TComDataCU * & rpcBestCU,TComDataCU * & rpcTempCU DEBUG_STRING_FN_DECLARE (sDebug),Bool * earlyDetectionSkipMode)1160 Void TEncCu::xCheckRDCostMerge2Nx2N( TComDataCU*& rpcBestCU, TComDataCU*& rpcTempCU DEBUG_STRING_FN_DECLARE(sDebug), Bool *earlyDetectionSkipMode )
1161 {
1162   assert( rpcTempCU->getSlice()->getSliceType() != I_SLICE );
1163   TComMvField  cMvFieldNeighbours[2 * MRG_MAX_NUM_CANDS]; // double length for mv of both lists
1164   UChar uhInterDirNeighbours[MRG_MAX_NUM_CANDS];
1165   Int numValidMergeCand = 0;
1166   const Bool bTransquantBypassFlag = rpcTempCU->getCUTransquantBypass(0);
1167 
1168   for( UInt ui = 0; ui < rpcTempCU->getSlice()->getMaxNumMergeCand(); ++ui )
1169   {
1170     uhInterDirNeighbours[ui] = 0;
1171   }
1172   UChar uhDepth = rpcTempCU->getDepth( 0 );
1173   rpcTempCU->setPartSizeSubParts( SIZE_2Nx2N, 0, uhDepth ); // interprets depth relative to CTU level
1174   rpcTempCU->getInterMergeCandidates( 0, 0, cMvFieldNeighbours,uhInterDirNeighbours, numValidMergeCand );
1175 
1176   Int mergeCandBuffer[MRG_MAX_NUM_CANDS];
1177   for( UInt ui = 0; ui < numValidMergeCand; ++ui )
1178   {
1179     mergeCandBuffer[ui] = 0;
1180   }
1181 
1182   Bool bestIsSkip = false;
1183 
1184   UInt iteration;
1185   if ( rpcTempCU->isLosslessCoded(0))
1186   {
1187     iteration = 1;
1188   }
1189   else
1190   {
1191     iteration = 2;
1192   }
1193   DEBUG_STRING_NEW(bestStr)
1194 
1195   for( UInt uiNoResidual = 0; uiNoResidual < iteration; ++uiNoResidual )
1196   {
1197     for( UInt uiMergeCand = 0; uiMergeCand < numValidMergeCand; ++uiMergeCand )
1198     {
1199       if(!(uiNoResidual==1 && mergeCandBuffer[uiMergeCand]==1))
1200       {
1201         if( !(bestIsSkip && uiNoResidual == 0) )
1202         {
1203           DEBUG_STRING_NEW(tmpStr)
1204           // set MC parameters
1205           rpcTempCU->setPredModeSubParts( MODE_INTER, 0, uhDepth ); // interprets depth relative to CTU level
1206           rpcTempCU->setCUTransquantBypassSubParts( bTransquantBypassFlag, 0, uhDepth );
1207           rpcTempCU->setChromaQpAdjSubParts( bTransquantBypassFlag ? 0 : m_ChromaQpAdjIdc, 0, uhDepth );
1208           rpcTempCU->setPartSizeSubParts( SIZE_2Nx2N, 0, uhDepth ); // interprets depth relative to CTU level
1209           rpcTempCU->setMergeFlagSubParts( true, 0, 0, uhDepth ); // interprets depth relative to CTU level
1210           rpcTempCU->setMergeIndexSubParts( uiMergeCand, 0, 0, uhDepth ); // interprets depth relative to CTU level
1211           rpcTempCU->setInterDirSubParts( uhInterDirNeighbours[uiMergeCand], 0, 0, uhDepth ); // interprets depth relative to CTU level
1212           rpcTempCU->getCUMvField( REF_PIC_LIST_0 )->setAllMvField( cMvFieldNeighbours[0 + 2*uiMergeCand], SIZE_2Nx2N, 0, 0 ); // interprets depth relative to rpcTempCU level
1213           rpcTempCU->getCUMvField( REF_PIC_LIST_1 )->setAllMvField( cMvFieldNeighbours[1 + 2*uiMergeCand], SIZE_2Nx2N, 0, 0 ); // interprets depth relative to rpcTempCU level
1214 
1215           // do MC
1216           m_pcPredSearch->motionCompensation ( rpcTempCU, m_ppcPredYuvTemp[uhDepth] );
1217           // estimate residual and encode everything
1218           m_pcPredSearch->encodeResAndCalcRdInterCU( rpcTempCU,
1219                                                      m_ppcOrigYuv    [uhDepth],
1220                                                      m_ppcPredYuvTemp[uhDepth],
1221                                                      m_ppcResiYuvTemp[uhDepth],
1222                                                      m_ppcResiYuvBest[uhDepth],
1223                                                      m_ppcRecoYuvTemp[uhDepth],
1224                                                      (uiNoResidual != 0) DEBUG_STRING_PASS_INTO(tmpStr) );
1225 
1226 #ifdef DEBUG_STRING
1227           DebugInterPredResiReco(tmpStr, *(m_ppcPredYuvTemp[uhDepth]), *(m_ppcResiYuvBest[uhDepth]), *(m_ppcRecoYuvTemp[uhDepth]), DebugStringGetPredModeMask(rpcTempCU->getPredictionMode(0)));
1228 #endif
1229 
1230           if ((uiNoResidual == 0) && (rpcTempCU->getQtRootCbf(0) == 0))
1231           {
1232             // If no residual when allowing for one, then set mark to not try case where residual is forced to 0
1233             mergeCandBuffer[uiMergeCand] = 1;
1234           }
1235 
1236           rpcTempCU->setSkipFlagSubParts( rpcTempCU->getQtRootCbf(0) == 0, 0, uhDepth );
1237           Int orgQP = rpcTempCU->getQP( 0 );
1238           xCheckDQP( rpcTempCU );
1239           xCheckBestMode(rpcBestCU, rpcTempCU, uhDepth DEBUG_STRING_PASS_INTO(bestStr) DEBUG_STRING_PASS_INTO(tmpStr));
1240 
1241           rpcTempCU->initEstData( uhDepth, orgQP, bTransquantBypassFlag );
1242 
1243           if( m_pcEncCfg->getUseFastDecisionForMerge() && !bestIsSkip )
1244           {
1245             bestIsSkip = rpcBestCU->getQtRootCbf(0) == 0;
1246           }
1247         }
1248       }
1249     }
1250 
1251     if(uiNoResidual == 0 && m_pcEncCfg->getUseEarlySkipDetection())
1252     {
1253       if(rpcBestCU->getQtRootCbf( 0 ) == 0)
1254       {
1255         if( rpcBestCU->getMergeFlag( 0 ))
1256         {
1257           *earlyDetectionSkipMode = true;
1258         }
1259         else if(m_pcEncCfg->getFastSearch() != SELECTIVE)
1260         {
1261           Int absoulte_MV=0;
1262           for ( UInt uiRefListIdx = 0; uiRefListIdx < 2; uiRefListIdx++ )
1263           {
1264             if ( rpcBestCU->getSlice()->getNumRefIdx( RefPicList( uiRefListIdx ) ) > 0 )
1265             {
1266               TComCUMvField* pcCUMvField = rpcBestCU->getCUMvField(RefPicList( uiRefListIdx ));
1267               Int iHor = pcCUMvField->getMvd( 0 ).getAbsHor();
1268               Int iVer = pcCUMvField->getMvd( 0 ).getAbsVer();
1269               absoulte_MV+=iHor+iVer;
1270             }
1271           }
1272 
1273           if(absoulte_MV == 0)
1274           {
1275             *earlyDetectionSkipMode = true;
1276           }
1277         }
1278       }
1279     }
1280   }
1281   DEBUG_STRING_APPEND(sDebug, bestStr)
1282 }
1283 
1284 
1285 #if AMP_MRG
xCheckRDCostInter(TComDataCU * & rpcBestCU,TComDataCU * & rpcTempCU,PartSize ePartSize DEBUG_STRING_FN_DECLARE (sDebug),Bool bUseMRG)1286 Void TEncCu::xCheckRDCostInter( TComDataCU*& rpcBestCU, TComDataCU*& rpcTempCU, PartSize ePartSize DEBUG_STRING_FN_DECLARE(sDebug), Bool bUseMRG)
1287 #else
1288 Void TEncCu::xCheckRDCostInter( TComDataCU*& rpcBestCU, TComDataCU*& rpcTempCU, PartSize ePartSize )
1289 #endif
1290 {
1291   DEBUG_STRING_NEW(sTest)
1292 
1293   UChar uhDepth = rpcTempCU->getDepth( 0 );
1294 
1295   rpcTempCU->setDepthSubParts( uhDepth, 0 );
1296 
1297   rpcTempCU->setSkipFlagSubParts( false, 0, uhDepth );
1298 
1299   rpcTempCU->setPartSizeSubParts  ( ePartSize,  0, uhDepth );
1300   rpcTempCU->setPredModeSubParts  ( MODE_INTER, 0, uhDepth );
1301   rpcTempCU->setChromaQpAdjSubParts( rpcTempCU->getCUTransquantBypass(0) ? 0 : m_ChromaQpAdjIdc, 0, uhDepth );
1302 
1303 #if AMP_MRG
1304   rpcTempCU->setMergeAMP (true);
1305   m_pcPredSearch->predInterSearch ( rpcTempCU, m_ppcOrigYuv[uhDepth], m_ppcPredYuvTemp[uhDepth], m_ppcResiYuvTemp[uhDepth], m_ppcRecoYuvTemp[uhDepth] DEBUG_STRING_PASS_INTO(sTest), false, bUseMRG );
1306 #else
1307   m_pcPredSearch->predInterSearch ( rpcTempCU, m_ppcOrigYuv[uhDepth], m_ppcPredYuvTemp[uhDepth], m_ppcResiYuvTemp[uhDepth], m_ppcRecoYuvTemp[uhDepth] );
1308 #endif
1309 
1310 #if AMP_MRG
1311   if ( !rpcTempCU->getMergeAMP() )
1312   {
1313     return;
1314   }
1315 #endif
1316 
1317   m_pcPredSearch->encodeResAndCalcRdInterCU( rpcTempCU, m_ppcOrigYuv[uhDepth], m_ppcPredYuvTemp[uhDepth], m_ppcResiYuvTemp[uhDepth], m_ppcResiYuvBest[uhDepth], m_ppcRecoYuvTemp[uhDepth], false DEBUG_STRING_PASS_INTO(sTest) );
1318   rpcTempCU->getTotalCost()  = m_pcRdCost->calcRdCost( rpcTempCU->getTotalBits(), rpcTempCU->getTotalDistortion() );
1319 
1320 #ifdef DEBUG_STRING
1321   DebugInterPredResiReco(sTest, *(m_ppcPredYuvTemp[uhDepth]), *(m_ppcResiYuvBest[uhDepth]), *(m_ppcRecoYuvTemp[uhDepth]), DebugStringGetPredModeMask(rpcTempCU->getPredictionMode(0)));
1322 #endif
1323 
1324   xCheckDQP( rpcTempCU );
1325   xCheckBestMode(rpcBestCU, rpcTempCU, uhDepth DEBUG_STRING_PASS_INTO(sDebug) DEBUG_STRING_PASS_INTO(sTest));
1326 }
1327 
xCheckRDCostIntra(TComDataCU * & rpcBestCU,TComDataCU * & rpcTempCU,Double & cost,PartSize eSize DEBUG_STRING_FN_DECLARE (sDebug))1328 Void TEncCu::xCheckRDCostIntra( TComDataCU *&rpcBestCU,
1329                                 TComDataCU *&rpcTempCU,
1330                                 Double      &cost,
1331                                 PartSize     eSize
1332                                 DEBUG_STRING_FN_DECLARE(sDebug) )
1333 {
1334   DEBUG_STRING_NEW(sTest)
1335 
1336   UInt uiDepth = rpcTempCU->getDepth( 0 );
1337 
1338   rpcTempCU->setSkipFlagSubParts( false, 0, uiDepth );
1339 
1340   rpcTempCU->setPartSizeSubParts( eSize, 0, uiDepth );
1341   rpcTempCU->setPredModeSubParts( MODE_INTRA, 0, uiDepth );
1342   rpcTempCU->setChromaQpAdjSubParts( rpcTempCU->getCUTransquantBypass(0) ? 0 : m_ChromaQpAdjIdc, 0, uiDepth );
1343 
1344   Bool bSeparateLumaChroma = true; // choose estimation mode
1345 
1346   Distortion uiPreCalcDistC = 0;
1347   if (rpcBestCU->getPic()->getChromaFormat()==CHROMA_400)
1348   {
1349     bSeparateLumaChroma=true;
1350   }
1351 
1352   Pel resiLuma[NUMBER_OF_STORED_RESIDUAL_TYPES][MAX_CU_SIZE * MAX_CU_SIZE];
1353 
1354   if( !bSeparateLumaChroma )
1355   {
1356     // after this function, the direction will be PLANAR, DC, HOR or VER
1357     // however, if Luma ends up being one of those, the chroma dir must be later changed to DM_CHROMA.
1358     m_pcPredSearch->preestChromaPredMode( rpcTempCU, m_ppcOrigYuv[uiDepth], m_ppcPredYuvTemp[uiDepth] );
1359   }
1360   m_pcPredSearch->estIntraPredQT( rpcTempCU, m_ppcOrigYuv[uiDepth], m_ppcPredYuvTemp[uiDepth], m_ppcResiYuvTemp[uiDepth], m_ppcRecoYuvTemp[uiDepth], resiLuma, uiPreCalcDistC, bSeparateLumaChroma DEBUG_STRING_PASS_INTO(sTest) );
1361 
1362   m_ppcRecoYuvTemp[uiDepth]->copyToPicComponent(COMPONENT_Y, rpcTempCU->getPic()->getPicYuvRec(), rpcTempCU->getCtuRsAddr(), rpcTempCU->getZorderIdxInCtu() );
1363 
1364   if (rpcBestCU->getPic()->getChromaFormat()!=CHROMA_400)
1365   {
1366     m_pcPredSearch->estIntraPredChromaQT( rpcTempCU, m_ppcOrigYuv[uiDepth], m_ppcPredYuvTemp[uiDepth], m_ppcResiYuvTemp[uiDepth], m_ppcRecoYuvTemp[uiDepth], resiLuma, uiPreCalcDistC DEBUG_STRING_PASS_INTO(sTest) );
1367   }
1368 
1369   m_pcEntropyCoder->resetBits();
1370 
1371   if ( rpcTempCU->getSlice()->getPPS()->getTransquantBypassEnableFlag())
1372   {
1373     m_pcEntropyCoder->encodeCUTransquantBypassFlag( rpcTempCU, 0,          true );
1374   }
1375 
1376   m_pcEntropyCoder->encodeSkipFlag ( rpcTempCU, 0,          true );
1377   m_pcEntropyCoder->encodePredMode( rpcTempCU, 0,          true );
1378   m_pcEntropyCoder->encodePartSize( rpcTempCU, 0, uiDepth, true );
1379   m_pcEntropyCoder->encodePredInfo( rpcTempCU, 0 );
1380   m_pcEntropyCoder->encodeIPCMInfo(rpcTempCU, 0, true );
1381 
1382   // Encode Coefficients
1383   Bool bCodeDQP = getdQPFlag();
1384   Bool codeChromaQpAdjFlag = getCodeChromaQpAdjFlag();
1385   m_pcEntropyCoder->encodeCoeff( rpcTempCU, 0, uiDepth, bCodeDQP, codeChromaQpAdjFlag );
1386   setCodeChromaQpAdjFlag( codeChromaQpAdjFlag );
1387   setdQPFlag( bCodeDQP );
1388 
1389   m_pcRDGoOnSbacCoder->store(m_pppcRDSbacCoder[uiDepth][CI_TEMP_BEST]);
1390 
1391   rpcTempCU->getTotalBits() = m_pcEntropyCoder->getNumberOfWrittenBits();
1392   rpcTempCU->getTotalBins() = ((TEncBinCABAC *)((TEncSbac*)m_pcEntropyCoder->m_pcEntropyCoderIf)->getEncBinIf())->getBinsCoded();
1393   rpcTempCU->getTotalCost() = m_pcRdCost->calcRdCost( rpcTempCU->getTotalBits(), rpcTempCU->getTotalDistortion() );
1394 
1395   xCheckDQP( rpcTempCU );
1396 
1397   cost = rpcTempCU->getTotalCost();
1398 
1399   xCheckBestMode(rpcBestCU, rpcTempCU, uiDepth DEBUG_STRING_PASS_INTO(sDebug) DEBUG_STRING_PASS_INTO(sTest));
1400 }
1401 
1402 
1403 /** Check R-D costs for a CU with PCM mode.
1404  * \param rpcBestCU pointer to best mode CU data structure
1405  * \param rpcTempCU pointer to testing mode CU data structure
1406  * \returns Void
1407  *
1408  * \note Current PCM implementation encodes sample values in a lossless way. The distortion of PCM mode CUs are zero. PCM mode is selected if the best mode yields bits greater than that of PCM mode.
1409  */
xCheckIntraPCM(TComDataCU * & rpcBestCU,TComDataCU * & rpcTempCU)1410 Void TEncCu::xCheckIntraPCM( TComDataCU*& rpcBestCU, TComDataCU*& rpcTempCU )
1411 {
1412   UInt uiDepth = rpcTempCU->getDepth( 0 );
1413 
1414   rpcTempCU->setSkipFlagSubParts( false, 0, uiDepth );
1415 
1416   rpcTempCU->setIPCMFlag(0, true);
1417   rpcTempCU->setIPCMFlagSubParts (true, 0, rpcTempCU->getDepth(0));
1418   rpcTempCU->setPartSizeSubParts( SIZE_2Nx2N, 0, uiDepth );
1419   rpcTempCU->setPredModeSubParts( MODE_INTRA, 0, uiDepth );
1420   rpcTempCU->setTrIdxSubParts ( 0, 0, uiDepth );
1421   rpcTempCU->setChromaQpAdjSubParts( rpcTempCU->getCUTransquantBypass(0) ? 0 : m_ChromaQpAdjIdc, 0, uiDepth );
1422 
1423   m_pcPredSearch->IPCMSearch( rpcTempCU, m_ppcOrigYuv[uiDepth], m_ppcPredYuvTemp[uiDepth], m_ppcResiYuvTemp[uiDepth], m_ppcRecoYuvTemp[uiDepth]);
1424 
1425   m_pcRDGoOnSbacCoder->load(m_pppcRDSbacCoder[uiDepth][CI_CURR_BEST]);
1426 
1427   m_pcEntropyCoder->resetBits();
1428 
1429   if ( rpcTempCU->getSlice()->getPPS()->getTransquantBypassEnableFlag())
1430   {
1431     m_pcEntropyCoder->encodeCUTransquantBypassFlag( rpcTempCU, 0,          true );
1432   }
1433 
1434   m_pcEntropyCoder->encodeSkipFlag ( rpcTempCU, 0,          true );
1435   m_pcEntropyCoder->encodePredMode ( rpcTempCU, 0,          true );
1436   m_pcEntropyCoder->encodePartSize ( rpcTempCU, 0, uiDepth, true );
1437   m_pcEntropyCoder->encodeIPCMInfo ( rpcTempCU, 0, true );
1438 
1439   m_pcRDGoOnSbacCoder->store(m_pppcRDSbacCoder[uiDepth][CI_TEMP_BEST]);
1440 
1441   rpcTempCU->getTotalBits() = m_pcEntropyCoder->getNumberOfWrittenBits();
1442   rpcTempCU->getTotalBins() = ((TEncBinCABAC *)((TEncSbac*)m_pcEntropyCoder->m_pcEntropyCoderIf)->getEncBinIf())->getBinsCoded();
1443   rpcTempCU->getTotalCost() = m_pcRdCost->calcRdCost( rpcTempCU->getTotalBits(), rpcTempCU->getTotalDistortion() );
1444 
1445   xCheckDQP( rpcTempCU );
1446   DEBUG_STRING_NEW(a)
1447   DEBUG_STRING_NEW(b)
1448   xCheckBestMode(rpcBestCU, rpcTempCU, uiDepth DEBUG_STRING_PASS_INTO(a) DEBUG_STRING_PASS_INTO(b));
1449 }
1450 
1451 /** check whether current try is the best with identifying the depth of current try
1452  * \param rpcBestCU
1453  * \param rpcTempCU
1454  * \returns Void
1455  */
xCheckBestMode(TComDataCU * & rpcBestCU,TComDataCU * & rpcTempCU,UInt uiDepth DEBUG_STRING_FN_DECLARE (sParent)DEBUG_STRING_FN_DECLARE (sTest)DEBUG_STRING_PASS_INTO (Bool bAddSizeInfo))1456 Void TEncCu::xCheckBestMode( TComDataCU*& rpcBestCU, TComDataCU*& rpcTempCU, UInt uiDepth DEBUG_STRING_FN_DECLARE(sParent) DEBUG_STRING_FN_DECLARE(sTest) DEBUG_STRING_PASS_INTO(Bool bAddSizeInfo) )
1457 {
1458   if( rpcTempCU->getTotalCost() < rpcBestCU->getTotalCost() )
1459   {
1460     TComYuv* pcYuv;
1461     // Change Information data
1462     TComDataCU* pcCU = rpcBestCU;
1463     rpcBestCU = rpcTempCU;
1464     rpcTempCU = pcCU;
1465 
1466     // Change Prediction data
1467     pcYuv = m_ppcPredYuvBest[uiDepth];
1468     m_ppcPredYuvBest[uiDepth] = m_ppcPredYuvTemp[uiDepth];
1469     m_ppcPredYuvTemp[uiDepth] = pcYuv;
1470 
1471     // Change Reconstruction data
1472     pcYuv = m_ppcRecoYuvBest[uiDepth];
1473     m_ppcRecoYuvBest[uiDepth] = m_ppcRecoYuvTemp[uiDepth];
1474     m_ppcRecoYuvTemp[uiDepth] = pcYuv;
1475 
1476     pcYuv = NULL;
1477     pcCU  = NULL;
1478 
1479     // store temp best CI for next CU coding
1480     m_pppcRDSbacCoder[uiDepth][CI_TEMP_BEST]->store(m_pppcRDSbacCoder[uiDepth][CI_NEXT_BEST]);
1481 
1482 
1483 #ifdef DEBUG_STRING
1484     DEBUG_STRING_SWAP(sParent, sTest)
1485     const PredMode predMode=rpcBestCU->getPredictionMode(0);
1486     if ((DebugOptionList::DebugString_Structure.getInt()&DebugStringGetPredModeMask(predMode)) && bAddSizeInfo)
1487     {
1488       std::stringstream ss(stringstream::out);
1489       ss <<"###: " << (predMode==MODE_INTRA?"Intra   ":"Inter   ") << partSizeToString[rpcBestCU->getPartitionSize(0)] << " CU at " << rpcBestCU->getCUPelX() << ", " << rpcBestCU->getCUPelY() << " width=" << UInt(rpcBestCU->getWidth(0)) << std::endl;
1490       sParent+=ss.str();
1491     }
1492 #endif
1493   }
1494 }
1495 
xCheckDQP(TComDataCU * pcCU)1496 Void TEncCu::xCheckDQP( TComDataCU* pcCU )
1497 {
1498   UInt uiDepth = pcCU->getDepth( 0 );
1499 
1500   if( pcCU->getSlice()->getPPS()->getUseDQP() && (g_uiMaxCUWidth>>uiDepth) >= pcCU->getSlice()->getPPS()->getMinCuDQPSize() )
1501   {
1502     if ( pcCU->getQtRootCbf( 0) )
1503     {
1504 #if !RDO_WITHOUT_DQP_BITS
1505       m_pcEntropyCoder->resetBits();
1506       m_pcEntropyCoder->encodeQP( pcCU, 0, false );
1507       pcCU->getTotalBits() += m_pcEntropyCoder->getNumberOfWrittenBits(); // dQP bits
1508       pcCU->getTotalBins() += ((TEncBinCABAC *)((TEncSbac*)m_pcEntropyCoder->m_pcEntropyCoderIf)->getEncBinIf())->getBinsCoded();
1509       pcCU->getTotalCost() = m_pcRdCost->calcRdCost( pcCU->getTotalBits(), pcCU->getTotalDistortion() );
1510 #endif
1511     }
1512     else
1513     {
1514       pcCU->setQPSubParts( pcCU->getRefQP( 0 ), 0, uiDepth ); // set QP to default QP
1515     }
1516   }
1517 }
1518 
xCopyAMVPInfo(AMVPInfo * pSrc,AMVPInfo * pDst)1519 Void TEncCu::xCopyAMVPInfo (AMVPInfo* pSrc, AMVPInfo* pDst)
1520 {
1521   pDst->iN = pSrc->iN;
1522   for (Int i = 0; i < pSrc->iN; i++)
1523   {
1524     pDst->m_acMvCand[i] = pSrc->m_acMvCand[i];
1525   }
1526 }
xCopyYuv2Pic(TComPic * rpcPic,UInt uiCUAddr,UInt uiAbsPartIdx,UInt uiDepth,UInt uiSrcDepth,TComDataCU * pcCU,UInt uiLPelX,UInt uiTPelY)1527 Void TEncCu::xCopyYuv2Pic(TComPic* rpcPic, UInt uiCUAddr, UInt uiAbsPartIdx, UInt uiDepth, UInt uiSrcDepth, TComDataCU* pcCU, UInt uiLPelX, UInt uiTPelY )
1528 {
1529   UInt uiAbsPartIdxInRaster = g_auiZscanToRaster[uiAbsPartIdx];
1530   UInt uiSrcBlkWidth = rpcPic->getNumPartInCtuWidth() >> (uiSrcDepth);
1531   UInt uiBlkWidth    = rpcPic->getNumPartInCtuWidth() >> (uiDepth);
1532   UInt uiPartIdxX = ( ( uiAbsPartIdxInRaster % rpcPic->getNumPartInCtuWidth() ) % uiSrcBlkWidth) / uiBlkWidth;
1533   UInt uiPartIdxY = ( ( uiAbsPartIdxInRaster / rpcPic->getNumPartInCtuWidth() ) % uiSrcBlkWidth) / uiBlkWidth;
1534   UInt uiPartIdx = uiPartIdxY * ( uiSrcBlkWidth / uiBlkWidth ) + uiPartIdxX;
1535   m_ppcRecoYuvBest[uiSrcDepth]->copyToPicYuv( rpcPic->getPicYuvRec (), uiCUAddr, uiAbsPartIdx, uiDepth - uiSrcDepth, uiPartIdx);
1536 
1537   m_ppcPredYuvBest[uiSrcDepth]->copyToPicYuv( rpcPic->getPicYuvPred (), uiCUAddr, uiAbsPartIdx, uiDepth - uiSrcDepth, uiPartIdx);
1538 }
1539 
xCopyYuv2Tmp(UInt uiPartUnitIdx,UInt uiNextDepth)1540 Void TEncCu::xCopyYuv2Tmp( UInt uiPartUnitIdx, UInt uiNextDepth )
1541 {
1542   UInt uiCurrDepth = uiNextDepth - 1;
1543   m_ppcRecoYuvBest[uiNextDepth]->copyToPartYuv( m_ppcRecoYuvTemp[uiCurrDepth], uiPartUnitIdx );
1544   m_ppcPredYuvBest[uiNextDepth]->copyToPartYuv( m_ppcPredYuvBest[uiCurrDepth], uiPartUnitIdx);
1545 }
1546 
1547 /** Function for filling the PCM buffer of a CU using its original sample array
1548  * \param pcCU pointer to current CU
1549  * \param pcOrgYuv pointer to original sample array
1550  * \returns Void
1551  */
xFillPCMBuffer(TComDataCU * pCU,TComYuv * pOrgYuv)1552 Void TEncCu::xFillPCMBuffer     ( TComDataCU* pCU, TComYuv* pOrgYuv )
1553 {
1554   const ChromaFormat format = pCU->getPic()->getChromaFormat();
1555   const UInt numberValidComponents = getNumberValidComponents(format);
1556   for (UInt componentIndex = 0; componentIndex < numberValidComponents; componentIndex++)
1557   {
1558     const ComponentID component = ComponentID(componentIndex);
1559 
1560     const UInt width  = pCU->getWidth(0)  >> getComponentScaleX(component, format);
1561     const UInt height = pCU->getHeight(0) >> getComponentScaleY(component, format);
1562 
1563     Pel *source      = pOrgYuv->getAddr(component, 0, width);
1564     Pel *destination = pCU->getPCMSample(component);
1565 
1566     const UInt sourceStride = pOrgYuv->getStride(component);
1567 
1568     for (Int line = 0; line < height; line++)
1569     {
1570       for (Int column = 0; column < width; column++)
1571       {
1572         destination[column] = source[column];
1573       }
1574 
1575       source      += sourceStride;
1576       destination += width;
1577     }
1578   }
1579 }
1580 
1581 #if ADAPTIVE_QP_SELECTION
1582 /** Collect ARL statistics from one block
1583   */
xTuCollectARLStats(TCoeff * rpcCoeff,TCoeff * rpcArlCoeff,Int NumCoeffInCU,Double * cSum,UInt * numSamples)1584 Int TEncCu::xTuCollectARLStats(TCoeff* rpcCoeff, TCoeff* rpcArlCoeff, Int NumCoeffInCU, Double* cSum, UInt* numSamples )
1585 {
1586   for( Int n = 0; n < NumCoeffInCU; n++ )
1587   {
1588     TCoeff u = abs( rpcCoeff[ n ] );
1589     TCoeff absc = rpcArlCoeff[ n ];
1590 
1591     if( u != 0 )
1592     {
1593       if( u < LEVEL_RANGE )
1594       {
1595         cSum[ u ] += ( Double )absc;
1596         numSamples[ u ]++;
1597       }
1598       else
1599       {
1600         cSum[ LEVEL_RANGE ] += ( Double )absc - ( Double )( u << ARL_C_PRECISION );
1601         numSamples[ LEVEL_RANGE ]++;
1602       }
1603     }
1604   }
1605 
1606   return 0;
1607 }
1608 
1609 /** Collect ARL statistics from one CTU
1610  * \param pcCU
1611  */
xCtuCollectARLStats(TComDataCU * pCtu)1612 Void TEncCu::xCtuCollectARLStats(TComDataCU* pCtu )
1613 {
1614   Double cSum[ LEVEL_RANGE + 1 ];     //: the sum of DCT coefficients corresponding to datatype and quantization output
1615   UInt numSamples[ LEVEL_RANGE + 1 ]; //: the number of coefficients corresponding to datatype and quantization output
1616 
1617   TCoeff* pCoeffY = pCtu->getCoeff(COMPONENT_Y);
1618   TCoeff* pArlCoeffY = pCtu->getArlCoeff(COMPONENT_Y);
1619 
1620   UInt uiMinCUWidth = g_uiMaxCUWidth >> g_uiMaxCUDepth;
1621   UInt uiMinNumCoeffInCU = 1 << uiMinCUWidth;
1622 
1623   memset( cSum, 0, sizeof( Double )*(LEVEL_RANGE+1) );
1624   memset( numSamples, 0, sizeof( UInt )*(LEVEL_RANGE+1) );
1625 
1626   // Collect stats to cSum[][] and numSamples[][]
1627   for(Int i = 0; i < pCtu->getTotalNumPart(); i ++ )
1628   {
1629     UInt uiTrIdx = pCtu->getTransformIdx(i);
1630 
1631     if(pCtu->isInter(i) && pCtu->getCbf( i, COMPONENT_Y, uiTrIdx ) )
1632     {
1633       xTuCollectARLStats(pCoeffY, pArlCoeffY, uiMinNumCoeffInCU, cSum, numSamples);
1634     }//Note that only InterY is processed. QP rounding is based on InterY data only.
1635 
1636     pCoeffY  += uiMinNumCoeffInCU;
1637     pArlCoeffY  += uiMinNumCoeffInCU;
1638   }
1639 
1640   for(Int u=1; u<LEVEL_RANGE;u++)
1641   {
1642     m_pcTrQuant->getSliceSumC()[u] += cSum[ u ] ;
1643     m_pcTrQuant->getSliceNSamples()[u] += numSamples[ u ] ;
1644   }
1645   m_pcTrQuant->getSliceSumC()[LEVEL_RANGE] += cSum[ LEVEL_RANGE ] ;
1646   m_pcTrQuant->getSliceNSamples()[LEVEL_RANGE] += numSamples[ LEVEL_RANGE ] ;
1647 }
1648 #endif
1649 //! \}
1650