1 /* The copyright in this software is being made available under the BSD
2  * License, included below. This software may be subject to other third party
3  * and contributor rights, including patent rights, and no such rights are
4  * granted under this license.
5  *
6  * Copyright (c) 2010-2014, ITU/ISO/IEC
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions are met:
11  *
12  *  * Redistributions of source code must retain the above copyright notice,
13  *    this list of conditions and the following disclaimer.
14  *  * Redistributions in binary form must reproduce the above copyright notice,
15  *    this list of conditions and the following disclaimer in the documentation
16  *    and/or other materials provided with the distribution.
17  *  * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
18  *    be used to endorse or promote products derived from this software without
19  *    specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
25  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
31  * THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 /** \file     TComTrQuant.cpp
35     \brief    transform and quantization class
36 */
37 
38 #include <stdlib.h>
39 #include <math.h>
40 #include <limits>
41 #include <memory.h>
42 #include "TComTrQuant.h"
43 #include "TComPic.h"
44 #include "ContextTables.h"
45 #include "TComTU.h"
46 #include "Debug.h"
47 
48 typedef struct
49 {
50   Int    iNNZbeforePos0;
51   Double d64CodedLevelandDist; // distortion and level cost only
52   Double d64UncodedDist;    // all zero coded block distortion
53   Double d64SigCost;
54   Double d64SigCost_0;
55 } coeffGroupRDStats;
56 
57 //! \ingroup TLibCommon
58 //! \{
59 
60 // ====================================================================================================================
61 // Constants
62 // ====================================================================================================================
63 
64 #define RDOQ_CHROMA                 1           ///< use of RDOQ in chroma
65 
66 
67 // ====================================================================================================================
68 // QpParam constructor
69 // ====================================================================================================================
70 
QpParam(const Int qpy,const ChannelType chType,const Int qpBdOffset,const Int chromaQPOffset,const ChromaFormat chFmt)71 QpParam::QpParam(const Int           qpy,
72                  const ChannelType   chType,
73                  const Int           qpBdOffset,
74                  const Int           chromaQPOffset,
75                  const ChromaFormat  chFmt )
76 {
77   Int baseQp;
78 
79   if(isLuma(chType))
80   {
81     baseQp = qpy + qpBdOffset;
82   }
83   else
84   {
85     baseQp = Clip3( -qpBdOffset, (chromaQPMappingTableSize - 1), qpy + chromaQPOffset );
86 
87     if(baseQp < 0)
88     {
89       baseQp = baseQp + qpBdOffset;
90     }
91     else
92     {
93       baseQp = getScaledChromaQP(baseQp, chFmt) + qpBdOffset;
94     }
95   }
96 
97   Qp =baseQp;
98   per=baseQp/6;
99   rem=baseQp%6;
100 }
101 
QpParam(const TComDataCU & cu,const ComponentID compID)102 QpParam::QpParam(const TComDataCU &cu, const ComponentID compID)
103 {
104   Int chromaQpOffset = 0;
105 
106   if (isChroma(compID))
107   {
108     chromaQpOffset += cu.getSlice()->getPPS()->getQpOffset(compID);
109     chromaQpOffset += cu.getSlice()->getSliceChromaQpDelta(compID);
110 
111     chromaQpOffset += cu.getSlice()->getPPS()->getChromaQpAdjTableAt(cu.getChromaQpAdj(0)).u.offset[Int(compID)-1];
112   }
113 
114   *this = QpParam(cu.getQP( 0 ),
115                   toChannelType(compID),
116                   cu.getSlice()->getSPS()->getQpBDOffset(toChannelType(compID)),
117                   chromaQpOffset,
118                   cu.getPic()->getChromaFormat());
119 }
120 
121 
122 // ====================================================================================================================
123 // TComTrQuant class member functions
124 // ====================================================================================================================
125 
TComTrQuant()126 TComTrQuant::TComTrQuant()
127 {
128   // allocate temporary buffers
129   m_plTempCoeff  = new TCoeff[ MAX_CU_SIZE*MAX_CU_SIZE ];
130 
131   // allocate bit estimation class  (for RDOQ)
132   m_pcEstBitsSbac = new estBitsSbacStruct;
133   initScalingList();
134 }
135 
~TComTrQuant()136 TComTrQuant::~TComTrQuant()
137 {
138   // delete temporary buffers
139   if ( m_plTempCoeff )
140   {
141     delete [] m_plTempCoeff;
142     m_plTempCoeff = NULL;
143   }
144 
145   // delete bit estimation class
146   if ( m_pcEstBitsSbac )
147   {
148     delete m_pcEstBitsSbac;
149   }
150   destroyScalingList();
151 }
152 
153 #if ADAPTIVE_QP_SELECTION
storeSliceQpNext(TComSlice * pcSlice)154 Void TComTrQuant::storeSliceQpNext(TComSlice* pcSlice)
155 {
156   // NOTE: does this work with negative QPs or when some blocks are transquant-bypass enabled?
157 
158   Int qpBase = pcSlice->getSliceQpBase();
159   Int sliceQpused = pcSlice->getSliceQp();
160   Int sliceQpnext;
161   Double alpha = qpBase < 17 ? 0.5 : 1;
162 
163   Int cnt=0;
164   for(Int u=1; u<=LEVEL_RANGE; u++)
165   {
166     cnt += m_sliceNsamples[u] ;
167   }
168 
169   if( !m_useRDOQ )
170   {
171     sliceQpused = qpBase;
172     alpha = 0.5;
173   }
174 
175   if( cnt > 120 )
176   {
177     Double sum = 0;
178     Int k = 0;
179     for(Int u=1; u<LEVEL_RANGE; u++)
180     {
181       sum += u*m_sliceSumC[u];
182       k += u*u*m_sliceNsamples[u];
183     }
184 
185     Int v;
186     Double q[MAX_QP+1] ;
187     for(v=0; v<=MAX_QP; v++)
188     {
189       q[v] = (Double)(g_invQuantScales[v%6] * (1<<(v/6)))/64 ;
190     }
191 
192     Double qnext = sum/k * q[sliceQpused] / (1<<ARL_C_PRECISION);
193 
194     for(v=0; v<MAX_QP; v++)
195     {
196       if(qnext < alpha * q[v] + (1 - alpha) * q[v+1] )
197       {
198         break;
199       }
200     }
201     sliceQpnext = Clip3(sliceQpused - 3, sliceQpused + 3, v);
202   }
203   else
204   {
205     sliceQpnext = sliceQpused;
206   }
207 
208   m_qpDelta[qpBase] = sliceQpnext - qpBase;
209 }
210 
initSliceQpDelta()211 Void TComTrQuant::initSliceQpDelta()
212 {
213   for(Int qp=0; qp<=MAX_QP; qp++)
214   {
215     m_qpDelta[qp] = qp < 17 ? 0 : 1;
216   }
217 }
218 
clearSliceARLCnt()219 Void TComTrQuant::clearSliceARLCnt()
220 {
221   memset(m_sliceSumC, 0, sizeof(Double)*(LEVEL_RANGE+1));
222   memset(m_sliceNsamples, 0, sizeof(Int)*(LEVEL_RANGE+1));
223 }
224 #endif
225 
226 
227 
228 #if MATRIX_MULT
229 /** NxN forward transform (2D) using brute force matrix multiplication (3 nested loops)
230  *  \param block pointer to input data (residual)
231  *  \param coeff pointer to output data (transform coefficients)
232  *  \param uiStride stride of input data
233  *  \param uiTrSize transform size (uiTrSize x uiTrSize)
234  *  \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
235  */
xTr(Int bitDepth,Pel * block,TCoeff * coeff,UInt uiStride,UInt uiTrSize,Bool useDST,const Int maxTrDynamicRange)236 Void xTr(Int bitDepth, Pel *block, TCoeff *coeff, UInt uiStride, UInt uiTrSize, Bool useDST, const Int maxTrDynamicRange)
237 {
238   UInt i,j,k;
239   TCoeff iSum;
240   TCoeff tmp[MAX_TU_SIZE * MAX_TU_SIZE];
241   const TMatrixCoeff *iT;
242   UInt uiLog2TrSize = g_aucConvertToBit[ uiTrSize ] + 2;
243 
244   if (uiTrSize==4)
245   {
246     iT  = (useDST ? g_as_DST_MAT_4[TRANSFORM_FORWARD][0] : g_aiT4[TRANSFORM_FORWARD][0]);
247   }
248   else if (uiTrSize==8)
249   {
250     iT = g_aiT8[TRANSFORM_FORWARD][0];
251   }
252   else if (uiTrSize==16)
253   {
254     iT = g_aiT16[TRANSFORM_FORWARD][0];
255   }
256   else if (uiTrSize==32)
257   {
258     iT = g_aiT32[TRANSFORM_FORWARD][0];
259   }
260   else
261   {
262     assert(0);
263   }
264 
265   static const Int TRANSFORM_MATRIX_SHIFT = g_transformMatrixShift[TRANSFORM_FORWARD];
266 
267   const Int shift_1st = (uiLog2TrSize +  bitDepth + TRANSFORM_MATRIX_SHIFT) - maxTrDynamicRange;
268   const Int shift_2nd = uiLog2TrSize + TRANSFORM_MATRIX_SHIFT;
269   const Int add_1st = (shift_1st>0) ? (1<<(shift_1st-1)) : 0;
270   const Int add_2nd = 1<<(shift_2nd-1);
271 
272   /* Horizontal transform */
273 
274   for (i=0; i<uiTrSize; i++)
275   {
276     for (j=0; j<uiTrSize; j++)
277     {
278       iSum = 0;
279       for (k=0; k<uiTrSize; k++)
280       {
281         iSum += iT[i*uiTrSize+k]*block[j*uiStride+k];
282       }
283       tmp[i*uiTrSize+j] = (iSum + add_1st)>>shift_1st;
284     }
285   }
286 
287   /* Vertical transform */
288   for (i=0; i<uiTrSize; i++)
289   {
290     for (j=0; j<uiTrSize; j++)
291     {
292       iSum = 0;
293       for (k=0; k<uiTrSize; k++)
294       {
295         iSum += iT[i*uiTrSize+k]*tmp[j*uiTrSize+k];
296       }
297       coeff[i*uiTrSize+j] = (iSum + add_2nd)>>shift_2nd;
298     }
299   }
300 }
301 
302 /** NxN inverse transform (2D) using brute force matrix multiplication (3 nested loops)
303  *  \param coeff pointer to input data (transform coefficients)
304  *  \param block pointer to output data (residual)
305  *  \param uiStride stride of output data
306  *  \param uiTrSize transform size (uiTrSize x uiTrSize)
307  *  \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
308  */
xITr(Int bitDepth,TCoeff * coeff,Pel * block,UInt uiStride,UInt uiTrSize,Bool useDST,const Int maxTrDynamicRange)309 Void xITr(Int bitDepth, TCoeff *coeff, Pel *block, UInt uiStride, UInt uiTrSize, Bool useDST, const Int maxTrDynamicRange)
310 {
311   UInt i,j,k;
312   TCoeff iSum;
313   TCoeff tmp[MAX_TU_SIZE * MAX_TU_SIZE];
314   const TMatrixCoeff *iT;
315 
316   if (uiTrSize==4)
317   {
318     iT  = (useDST ? g_as_DST_MAT_4[TRANSFORM_INVERSE][0] : g_aiT4[TRANSFORM_INVERSE][0]);
319   }
320   else if (uiTrSize==8)
321   {
322     iT = g_aiT8[TRANSFORM_INVERSE][0];
323   }
324   else if (uiTrSize==16)
325   {
326     iT = g_aiT16[TRANSFORM_INVERSE][0];
327   }
328   else if (uiTrSize==32)
329   {
330     iT = g_aiT32[TRANSFORM_INVERSE][0];
331   }
332   else
333   {
334     assert(0);
335   }
336 
337   static const Int TRANSFORM_MATRIX_SHIFT = g_transformMatrixShift[TRANSFORM_INVERSE];
338 
339   const Int shift_1st = TRANSFORM_MATRIX_SHIFT + 1; //1 has been added to shift_1st at the expense of shift_2nd
340   const Int shift_2nd = (TRANSFORM_MATRIX_SHIFT + maxTrDynamicRange - 1) - bitDepth;
341   const TCoeff clipMinimum = -(1 << maxTrDynamicRange);
342   const TCoeff clipMaximum =  (1 << maxTrDynamicRange) - 1;
343   assert(shift_2nd>=0);
344   const Int add_1st = 1<<(shift_1st-1);
345   const Int add_2nd = (shift_2nd>0) ? (1<<(shift_2nd-1)) : 0;
346 
347   /* Horizontal transform */
348   for (i=0; i<uiTrSize; i++)
349   {
350     for (j=0; j<uiTrSize; j++)
351     {
352       iSum = 0;
353       for (k=0; k<uiTrSize; k++)
354       {
355         iSum += iT[k*uiTrSize+i]*coeff[k*uiTrSize+j];
356       }
357 
358       // Clipping here is not in the standard, but is used to protect the "Pel" data type into which the inverse-transformed samples will be copied
359       tmp[i*uiTrSize+j] = Clip3<TCoeff>(clipMinimum, clipMaximum, (iSum + add_1st)>>shift_1st);
360     }
361   }
362 
363   /* Vertical transform */
364   for (i=0; i<uiTrSize; i++)
365   {
366     for (j=0; j<uiTrSize; j++)
367     {
368       iSum = 0;
369       for (k=0; k<uiTrSize; k++)
370       {
371         iSum += iT[k*uiTrSize+j]*tmp[i*uiTrSize+k];
372       }
373 
374       block[i*uiStride+j] = Clip3<TCoeff>(std::numeric_limits<Pel>::min(), std::numeric_limits<Pel>::max(), (iSum + add_2nd)>>shift_2nd);
375     }
376   }
377 }
378 
379 #endif //MATRIX_MULT
380 
381 
382 /** 4x4 forward transform implemented using partial butterfly structure (1D)
383  *  \param src   input data (residual)
384  *  \param dst   output data (transform coefficients)
385  *  \param shift specifies right shift after 1D transform
386  */
partialButterfly4(TCoeff * src,TCoeff * dst,Int shift,Int line)387 Void partialButterfly4(TCoeff *src, TCoeff *dst, Int shift, Int line)
388 {
389   Int j;
390   TCoeff E[2],O[2];
391   TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0;
392 
393   for (j=0; j<line; j++)
394   {
395     /* E and O */
396     E[0] = src[0] + src[3];
397     O[0] = src[0] - src[3];
398     E[1] = src[1] + src[2];
399     O[1] = src[1] - src[2];
400 
401     dst[0]      = (g_aiT4[TRANSFORM_FORWARD][0][0]*E[0] + g_aiT4[TRANSFORM_FORWARD][0][1]*E[1] + add)>>shift;
402     dst[2*line] = (g_aiT4[TRANSFORM_FORWARD][2][0]*E[0] + g_aiT4[TRANSFORM_FORWARD][2][1]*E[1] + add)>>shift;
403     dst[line]   = (g_aiT4[TRANSFORM_FORWARD][1][0]*O[0] + g_aiT4[TRANSFORM_FORWARD][1][1]*O[1] + add)>>shift;
404     dst[3*line] = (g_aiT4[TRANSFORM_FORWARD][3][0]*O[0] + g_aiT4[TRANSFORM_FORWARD][3][1]*O[1] + add)>>shift;
405 
406     src += 4;
407     dst ++;
408   }
409 }
410 
411 // Fast DST Algorithm. Full matrix multiplication for DST and Fast DST algorithm
412 // give identical results
fastForwardDst(TCoeff * block,TCoeff * coeff,Int shift)413 Void fastForwardDst(TCoeff *block, TCoeff *coeff, Int shift)  // input block, output coeff
414 {
415   Int i;
416   TCoeff c[4];
417   TCoeff rnd_factor = (shift > 0) ? (1<<(shift-1)) : 0;
418   for (i=0; i<4; i++)
419   {
420     // Intermediate Variables
421     c[0] = block[4*i+0];
422     c[1] = block[4*i+1];
423     c[2] = block[4*i+2];
424     c[3] = block[4*i+3];
425 
426     for (Int row = 0; row < 4; row++)
427     {
428       TCoeff result = 0;
429       for (Int column = 0; column < 4; column++)
430         result += c[column] * g_as_DST_MAT_4[TRANSFORM_FORWARD][row][column]; // use the defined matrix, rather than hard-wired numbers
431 
432       coeff[(row * 4) + i] = rightShift((result + rnd_factor), shift);
433     }
434   }
435 }
436 
fastInverseDst(TCoeff * tmp,TCoeff * block,Int shift,const TCoeff outputMinimum,const TCoeff outputMaximum)437 Void fastInverseDst(TCoeff *tmp, TCoeff *block, Int shift, const TCoeff outputMinimum, const TCoeff outputMaximum)  // input tmp, output block
438 {
439   Int i;
440   TCoeff c[4];
441   TCoeff rnd_factor = (shift > 0) ? (1<<(shift-1)) : 0;
442   for (i=0; i<4; i++)
443   {
444     // Intermediate Variables
445     c[0] = tmp[   i];
446     c[1] = tmp[4 +i];
447     c[2] = tmp[8 +i];
448     c[3] = tmp[12+i];
449 
450     for (Int column = 0; column < 4; column++)
451     {
452       TCoeff &result = block[(i * 4) + column];
453 
454       result = 0;
455       for (Int row = 0; row < 4; row++)
456         result += c[row] * g_as_DST_MAT_4[TRANSFORM_INVERSE][row][column]; // use the defined matrix, rather than hard-wired numbers
457 
458       result = Clip3( outputMinimum, outputMaximum, rightShift((result + rnd_factor), shift));
459     }
460   }
461 }
462 
463 /** 4x4 inverse transform implemented using partial butterfly structure (1D)
464  *  \param src   input data (transform coefficients)
465  *  \param dst   output data (residual)
466  *  \param shift specifies right shift after 1D transform
467  */
partialButterflyInverse4(TCoeff * src,TCoeff * dst,Int shift,Int line,const TCoeff outputMinimum,const TCoeff outputMaximum)468 Void partialButterflyInverse4(TCoeff *src, TCoeff *dst, Int shift, Int line, const TCoeff outputMinimum, const TCoeff outputMaximum)
469 {
470   Int j;
471   TCoeff E[2],O[2];
472   TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0;
473 
474   for (j=0; j<line; j++)
475   {
476     /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
477     O[0] = g_aiT4[TRANSFORM_INVERSE][1][0]*src[line] + g_aiT4[TRANSFORM_INVERSE][3][0]*src[3*line];
478     O[1] = g_aiT4[TRANSFORM_INVERSE][1][1]*src[line] + g_aiT4[TRANSFORM_INVERSE][3][1]*src[3*line];
479     E[0] = g_aiT4[TRANSFORM_INVERSE][0][0]*src[0]    + g_aiT4[TRANSFORM_INVERSE][2][0]*src[2*line];
480     E[1] = g_aiT4[TRANSFORM_INVERSE][0][1]*src[0]    + g_aiT4[TRANSFORM_INVERSE][2][1]*src[2*line];
481 
482     /* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
483     dst[0] = Clip3( outputMinimum, outputMaximum, (E[0] + O[0] + add)>>shift );
484     dst[1] = Clip3( outputMinimum, outputMaximum, (E[1] + O[1] + add)>>shift );
485     dst[2] = Clip3( outputMinimum, outputMaximum, (E[1] - O[1] + add)>>shift );
486     dst[3] = Clip3( outputMinimum, outputMaximum, (E[0] - O[0] + add)>>shift );
487 
488     src   ++;
489     dst += 4;
490   }
491 }
492 
493 /** 8x8 forward transform implemented using partial butterfly structure (1D)
494  *  \param src   input data (residual)
495  *  \param dst   output data (transform coefficients)
496  *  \param shift specifies right shift after 1D transform
497  */
partialButterfly8(TCoeff * src,TCoeff * dst,Int shift,Int line)498 Void partialButterfly8(TCoeff *src, TCoeff *dst, Int shift, Int line)
499 {
500   Int j,k;
501   TCoeff E[4],O[4];
502   TCoeff EE[2],EO[2];
503   TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0;
504 
505   for (j=0; j<line; j++)
506   {
507     /* E and O*/
508     for (k=0;k<4;k++)
509     {
510       E[k] = src[k] + src[7-k];
511       O[k] = src[k] - src[7-k];
512     }
513     /* EE and EO */
514     EE[0] = E[0] + E[3];
515     EO[0] = E[0] - E[3];
516     EE[1] = E[1] + E[2];
517     EO[1] = E[1] - E[2];
518 
519     dst[0]      = (g_aiT8[TRANSFORM_FORWARD][0][0]*EE[0] + g_aiT8[TRANSFORM_FORWARD][0][1]*EE[1] + add)>>shift;
520     dst[4*line] = (g_aiT8[TRANSFORM_FORWARD][4][0]*EE[0] + g_aiT8[TRANSFORM_FORWARD][4][1]*EE[1] + add)>>shift;
521     dst[2*line] = (g_aiT8[TRANSFORM_FORWARD][2][0]*EO[0] + g_aiT8[TRANSFORM_FORWARD][2][1]*EO[1] + add)>>shift;
522     dst[6*line] = (g_aiT8[TRANSFORM_FORWARD][6][0]*EO[0] + g_aiT8[TRANSFORM_FORWARD][6][1]*EO[1] + add)>>shift;
523 
524     dst[line]   = (g_aiT8[TRANSFORM_FORWARD][1][0]*O[0] + g_aiT8[TRANSFORM_FORWARD][1][1]*O[1] + g_aiT8[TRANSFORM_FORWARD][1][2]*O[2] + g_aiT8[TRANSFORM_FORWARD][1][3]*O[3] + add)>>shift;
525     dst[3*line] = (g_aiT8[TRANSFORM_FORWARD][3][0]*O[0] + g_aiT8[TRANSFORM_FORWARD][3][1]*O[1] + g_aiT8[TRANSFORM_FORWARD][3][2]*O[2] + g_aiT8[TRANSFORM_FORWARD][3][3]*O[3] + add)>>shift;
526     dst[5*line] = (g_aiT8[TRANSFORM_FORWARD][5][0]*O[0] + g_aiT8[TRANSFORM_FORWARD][5][1]*O[1] + g_aiT8[TRANSFORM_FORWARD][5][2]*O[2] + g_aiT8[TRANSFORM_FORWARD][5][3]*O[3] + add)>>shift;
527     dst[7*line] = (g_aiT8[TRANSFORM_FORWARD][7][0]*O[0] + g_aiT8[TRANSFORM_FORWARD][7][1]*O[1] + g_aiT8[TRANSFORM_FORWARD][7][2]*O[2] + g_aiT8[TRANSFORM_FORWARD][7][3]*O[3] + add)>>shift;
528 
529     src += 8;
530     dst ++;
531   }
532 }
533 
534 /** 8x8 inverse transform implemented using partial butterfly structure (1D)
535  *  \param src   input data (transform coefficients)
536  *  \param dst   output data (residual)
537  *  \param shift specifies right shift after 1D transform
538  */
partialButterflyInverse8(TCoeff * src,TCoeff * dst,Int shift,Int line,const TCoeff outputMinimum,const TCoeff outputMaximum)539 Void partialButterflyInverse8(TCoeff *src, TCoeff *dst, Int shift, Int line, const TCoeff outputMinimum, const TCoeff outputMaximum)
540 {
541   Int j,k;
542   TCoeff E[4],O[4];
543   TCoeff EE[2],EO[2];
544   TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0;
545 
546   for (j=0; j<line; j++)
547   {
548     /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
549     for (k=0;k<4;k++)
550     {
551       O[k] = g_aiT8[TRANSFORM_INVERSE][ 1][k]*src[line]   + g_aiT8[TRANSFORM_INVERSE][ 3][k]*src[3*line] +
552              g_aiT8[TRANSFORM_INVERSE][ 5][k]*src[5*line] + g_aiT8[TRANSFORM_INVERSE][ 7][k]*src[7*line];
553     }
554 
555     EO[0] = g_aiT8[TRANSFORM_INVERSE][2][0]*src[ 2*line ] + g_aiT8[TRANSFORM_INVERSE][6][0]*src[ 6*line ];
556     EO[1] = g_aiT8[TRANSFORM_INVERSE][2][1]*src[ 2*line ] + g_aiT8[TRANSFORM_INVERSE][6][1]*src[ 6*line ];
557     EE[0] = g_aiT8[TRANSFORM_INVERSE][0][0]*src[ 0      ] + g_aiT8[TRANSFORM_INVERSE][4][0]*src[ 4*line ];
558     EE[1] = g_aiT8[TRANSFORM_INVERSE][0][1]*src[ 0      ] + g_aiT8[TRANSFORM_INVERSE][4][1]*src[ 4*line ];
559 
560     /* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
561     E[0] = EE[0] + EO[0];
562     E[3] = EE[0] - EO[0];
563     E[1] = EE[1] + EO[1];
564     E[2] = EE[1] - EO[1];
565     for (k=0;k<4;k++)
566     {
567       dst[ k   ] = Clip3( outputMinimum, outputMaximum, (E[k] + O[k] + add)>>shift );
568       dst[ k+4 ] = Clip3( outputMinimum, outputMaximum, (E[3-k] - O[3-k] + add)>>shift );
569     }
570     src ++;
571     dst += 8;
572   }
573 }
574 
575 /** 16x16 forward transform implemented using partial butterfly structure (1D)
576  *  \param src   input data (residual)
577  *  \param dst   output data (transform coefficients)
578  *  \param shift specifies right shift after 1D transform
579  */
partialButterfly16(TCoeff * src,TCoeff * dst,Int shift,Int line)580 Void partialButterfly16(TCoeff *src, TCoeff *dst, Int shift, Int line)
581 {
582   Int j,k;
583   TCoeff E[8],O[8];
584   TCoeff EE[4],EO[4];
585   TCoeff EEE[2],EEO[2];
586   TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0;
587 
588   for (j=0; j<line; j++)
589   {
590     /* E and O*/
591     for (k=0;k<8;k++)
592     {
593       E[k] = src[k] + src[15-k];
594       O[k] = src[k] - src[15-k];
595     }
596     /* EE and EO */
597     for (k=0;k<4;k++)
598     {
599       EE[k] = E[k] + E[7-k];
600       EO[k] = E[k] - E[7-k];
601     }
602     /* EEE and EEO */
603     EEE[0] = EE[0] + EE[3];
604     EEO[0] = EE[0] - EE[3];
605     EEE[1] = EE[1] + EE[2];
606     EEO[1] = EE[1] - EE[2];
607 
608     dst[ 0      ] = (g_aiT16[TRANSFORM_FORWARD][ 0][0]*EEE[0] + g_aiT16[TRANSFORM_FORWARD][ 0][1]*EEE[1] + add)>>shift;
609     dst[ 8*line ] = (g_aiT16[TRANSFORM_FORWARD][ 8][0]*EEE[0] + g_aiT16[TRANSFORM_FORWARD][ 8][1]*EEE[1] + add)>>shift;
610     dst[ 4*line ] = (g_aiT16[TRANSFORM_FORWARD][ 4][0]*EEO[0] + g_aiT16[TRANSFORM_FORWARD][ 4][1]*EEO[1] + add)>>shift;
611     dst[ 12*line] = (g_aiT16[TRANSFORM_FORWARD][12][0]*EEO[0] + g_aiT16[TRANSFORM_FORWARD][12][1]*EEO[1] + add)>>shift;
612 
613     for (k=2;k<16;k+=4)
614     {
615       dst[ k*line ] = (g_aiT16[TRANSFORM_FORWARD][k][0]*EO[0] + g_aiT16[TRANSFORM_FORWARD][k][1]*EO[1] +
616                        g_aiT16[TRANSFORM_FORWARD][k][2]*EO[2] + g_aiT16[TRANSFORM_FORWARD][k][3]*EO[3] + add)>>shift;
617     }
618 
619     for (k=1;k<16;k+=2)
620     {
621       dst[ k*line ] = (g_aiT16[TRANSFORM_FORWARD][k][0]*O[0] + g_aiT16[TRANSFORM_FORWARD][k][1]*O[1] +
622                        g_aiT16[TRANSFORM_FORWARD][k][2]*O[2] + g_aiT16[TRANSFORM_FORWARD][k][3]*O[3] +
623                        g_aiT16[TRANSFORM_FORWARD][k][4]*O[4] + g_aiT16[TRANSFORM_FORWARD][k][5]*O[5] +
624                        g_aiT16[TRANSFORM_FORWARD][k][6]*O[6] + g_aiT16[TRANSFORM_FORWARD][k][7]*O[7] + add)>>shift;
625     }
626 
627     src += 16;
628     dst ++;
629 
630   }
631 }
632 
633 /** 16x16 inverse transform implemented using partial butterfly structure (1D)
634  *  \param src   input data (transform coefficients)
635  *  \param dst   output data (residual)
636  *  \param shift specifies right shift after 1D transform
637  */
partialButterflyInverse16(TCoeff * src,TCoeff * dst,Int shift,Int line,const TCoeff outputMinimum,const TCoeff outputMaximum)638 Void partialButterflyInverse16(TCoeff *src, TCoeff *dst, Int shift, Int line, const TCoeff outputMinimum, const TCoeff outputMaximum)
639 {
640   Int j,k;
641   TCoeff E[8],O[8];
642   TCoeff EE[4],EO[4];
643   TCoeff EEE[2],EEO[2];
644   TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0;
645 
646   for (j=0; j<line; j++)
647   {
648     /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
649     for (k=0;k<8;k++)
650     {
651       O[k] = g_aiT16[TRANSFORM_INVERSE][ 1][k]*src[ line]   + g_aiT16[TRANSFORM_INVERSE][ 3][k]*src[ 3*line] +
652              g_aiT16[TRANSFORM_INVERSE][ 5][k]*src[ 5*line] + g_aiT16[TRANSFORM_INVERSE][ 7][k]*src[ 7*line] +
653              g_aiT16[TRANSFORM_INVERSE][ 9][k]*src[ 9*line] + g_aiT16[TRANSFORM_INVERSE][11][k]*src[11*line] +
654              g_aiT16[TRANSFORM_INVERSE][13][k]*src[13*line] + g_aiT16[TRANSFORM_INVERSE][15][k]*src[15*line];
655     }
656     for (k=0;k<4;k++)
657     {
658       EO[k] = g_aiT16[TRANSFORM_INVERSE][ 2][k]*src[ 2*line] + g_aiT16[TRANSFORM_INVERSE][ 6][k]*src[ 6*line] +
659               g_aiT16[TRANSFORM_INVERSE][10][k]*src[10*line] + g_aiT16[TRANSFORM_INVERSE][14][k]*src[14*line];
660     }
661     EEO[0] = g_aiT16[TRANSFORM_INVERSE][4][0]*src[ 4*line ] + g_aiT16[TRANSFORM_INVERSE][12][0]*src[ 12*line ];
662     EEE[0] = g_aiT16[TRANSFORM_INVERSE][0][0]*src[ 0      ] + g_aiT16[TRANSFORM_INVERSE][ 8][0]*src[ 8*line  ];
663     EEO[1] = g_aiT16[TRANSFORM_INVERSE][4][1]*src[ 4*line ] + g_aiT16[TRANSFORM_INVERSE][12][1]*src[ 12*line ];
664     EEE[1] = g_aiT16[TRANSFORM_INVERSE][0][1]*src[ 0      ] + g_aiT16[TRANSFORM_INVERSE][ 8][1]*src[ 8*line  ];
665 
666     /* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
667     for (k=0;k<2;k++)
668     {
669       EE[k] = EEE[k] + EEO[k];
670       EE[k+2] = EEE[1-k] - EEO[1-k];
671     }
672     for (k=0;k<4;k++)
673     {
674       E[k] = EE[k] + EO[k];
675       E[k+4] = EE[3-k] - EO[3-k];
676     }
677     for (k=0;k<8;k++)
678     {
679       dst[k]   = Clip3( outputMinimum, outputMaximum, (E[k] + O[k] + add)>>shift );
680       dst[k+8] = Clip3( outputMinimum, outputMaximum, (E[7-k] - O[7-k] + add)>>shift );
681     }
682     src ++;
683     dst += 16;
684   }
685 }
686 
687 /** 32x32 forward transform implemented using partial butterfly structure (1D)
688  *  \param src   input data (residual)
689  *  \param dst   output data (transform coefficients)
690  *  \param shift specifies right shift after 1D transform
691  */
partialButterfly32(TCoeff * src,TCoeff * dst,Int shift,Int line)692 Void partialButterfly32(TCoeff *src, TCoeff *dst, Int shift, Int line)
693 {
694   Int j,k;
695   TCoeff E[16],O[16];
696   TCoeff EE[8],EO[8];
697   TCoeff EEE[4],EEO[4];
698   TCoeff EEEE[2],EEEO[2];
699   TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0;
700 
701   for (j=0; j<line; j++)
702   {
703     /* E and O*/
704     for (k=0;k<16;k++)
705     {
706       E[k] = src[k] + src[31-k];
707       O[k] = src[k] - src[31-k];
708     }
709     /* EE and EO */
710     for (k=0;k<8;k++)
711     {
712       EE[k] = E[k] + E[15-k];
713       EO[k] = E[k] - E[15-k];
714     }
715     /* EEE and EEO */
716     for (k=0;k<4;k++)
717     {
718       EEE[k] = EE[k] + EE[7-k];
719       EEO[k] = EE[k] - EE[7-k];
720     }
721     /* EEEE and EEEO */
722     EEEE[0] = EEE[0] + EEE[3];
723     EEEO[0] = EEE[0] - EEE[3];
724     EEEE[1] = EEE[1] + EEE[2];
725     EEEO[1] = EEE[1] - EEE[2];
726 
727     dst[ 0       ] = (g_aiT32[TRANSFORM_FORWARD][ 0][0]*EEEE[0] + g_aiT32[TRANSFORM_FORWARD][ 0][1]*EEEE[1] + add)>>shift;
728     dst[ 16*line ] = (g_aiT32[TRANSFORM_FORWARD][16][0]*EEEE[0] + g_aiT32[TRANSFORM_FORWARD][16][1]*EEEE[1] + add)>>shift;
729     dst[ 8*line  ] = (g_aiT32[TRANSFORM_FORWARD][ 8][0]*EEEO[0] + g_aiT32[TRANSFORM_FORWARD][ 8][1]*EEEO[1] + add)>>shift;
730     dst[ 24*line ] = (g_aiT32[TRANSFORM_FORWARD][24][0]*EEEO[0] + g_aiT32[TRANSFORM_FORWARD][24][1]*EEEO[1] + add)>>shift;
731     for (k=4;k<32;k+=8)
732     {
733       dst[ k*line ] = (g_aiT32[TRANSFORM_FORWARD][k][0]*EEO[0] + g_aiT32[TRANSFORM_FORWARD][k][1]*EEO[1] +
734                        g_aiT32[TRANSFORM_FORWARD][k][2]*EEO[2] + g_aiT32[TRANSFORM_FORWARD][k][3]*EEO[3] + add)>>shift;
735     }
736     for (k=2;k<32;k+=4)
737     {
738       dst[ k*line ] = (g_aiT32[TRANSFORM_FORWARD][k][0]*EO[0] + g_aiT32[TRANSFORM_FORWARD][k][1]*EO[1] +
739                        g_aiT32[TRANSFORM_FORWARD][k][2]*EO[2] + g_aiT32[TRANSFORM_FORWARD][k][3]*EO[3] +
740                        g_aiT32[TRANSFORM_FORWARD][k][4]*EO[4] + g_aiT32[TRANSFORM_FORWARD][k][5]*EO[5] +
741                        g_aiT32[TRANSFORM_FORWARD][k][6]*EO[6] + g_aiT32[TRANSFORM_FORWARD][k][7]*EO[7] + add)>>shift;
742     }
743     for (k=1;k<32;k+=2)
744     {
745       dst[ k*line ] = (g_aiT32[TRANSFORM_FORWARD][k][ 0]*O[ 0] + g_aiT32[TRANSFORM_FORWARD][k][ 1]*O[ 1] +
746                        g_aiT32[TRANSFORM_FORWARD][k][ 2]*O[ 2] + g_aiT32[TRANSFORM_FORWARD][k][ 3]*O[ 3] +
747                        g_aiT32[TRANSFORM_FORWARD][k][ 4]*O[ 4] + g_aiT32[TRANSFORM_FORWARD][k][ 5]*O[ 5] +
748                        g_aiT32[TRANSFORM_FORWARD][k][ 6]*O[ 6] + g_aiT32[TRANSFORM_FORWARD][k][ 7]*O[ 7] +
749                        g_aiT32[TRANSFORM_FORWARD][k][ 8]*O[ 8] + g_aiT32[TRANSFORM_FORWARD][k][ 9]*O[ 9] +
750                        g_aiT32[TRANSFORM_FORWARD][k][10]*O[10] + g_aiT32[TRANSFORM_FORWARD][k][11]*O[11] +
751                        g_aiT32[TRANSFORM_FORWARD][k][12]*O[12] + g_aiT32[TRANSFORM_FORWARD][k][13]*O[13] +
752                        g_aiT32[TRANSFORM_FORWARD][k][14]*O[14] + g_aiT32[TRANSFORM_FORWARD][k][15]*O[15] + add)>>shift;
753     }
754 
755     src += 32;
756     dst ++;
757   }
758 }
759 
760 /** 32x32 inverse transform implemented using partial butterfly structure (1D)
761  *  \param src   input data (transform coefficients)
762  *  \param dst   output data (residual)
763  *  \param shift specifies right shift after 1D transform
764  */
partialButterflyInverse32(TCoeff * src,TCoeff * dst,Int shift,Int line,const TCoeff outputMinimum,const TCoeff outputMaximum)765 Void partialButterflyInverse32(TCoeff *src, TCoeff *dst, Int shift, Int line, const TCoeff outputMinimum, const TCoeff outputMaximum)
766 {
767   Int j,k;
768   TCoeff E[16],O[16];
769   TCoeff EE[8],EO[8];
770   TCoeff EEE[4],EEO[4];
771   TCoeff EEEE[2],EEEO[2];
772   TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0;
773 
774   for (j=0; j<line; j++)
775   {
776     /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
777     for (k=0;k<16;k++)
778     {
779       O[k] = g_aiT32[TRANSFORM_INVERSE][ 1][k]*src[ line    ] + g_aiT32[TRANSFORM_INVERSE][ 3][k]*src[ 3*line  ] +
780              g_aiT32[TRANSFORM_INVERSE][ 5][k]*src[ 5*line  ] + g_aiT32[TRANSFORM_INVERSE][ 7][k]*src[ 7*line  ] +
781              g_aiT32[TRANSFORM_INVERSE][ 9][k]*src[ 9*line  ] + g_aiT32[TRANSFORM_INVERSE][11][k]*src[ 11*line ] +
782              g_aiT32[TRANSFORM_INVERSE][13][k]*src[ 13*line ] + g_aiT32[TRANSFORM_INVERSE][15][k]*src[ 15*line ] +
783              g_aiT32[TRANSFORM_INVERSE][17][k]*src[ 17*line ] + g_aiT32[TRANSFORM_INVERSE][19][k]*src[ 19*line ] +
784              g_aiT32[TRANSFORM_INVERSE][21][k]*src[ 21*line ] + g_aiT32[TRANSFORM_INVERSE][23][k]*src[ 23*line ] +
785              g_aiT32[TRANSFORM_INVERSE][25][k]*src[ 25*line ] + g_aiT32[TRANSFORM_INVERSE][27][k]*src[ 27*line ] +
786              g_aiT32[TRANSFORM_INVERSE][29][k]*src[ 29*line ] + g_aiT32[TRANSFORM_INVERSE][31][k]*src[ 31*line ];
787     }
788     for (k=0;k<8;k++)
789     {
790       EO[k] = g_aiT32[TRANSFORM_INVERSE][ 2][k]*src[ 2*line  ] + g_aiT32[TRANSFORM_INVERSE][ 6][k]*src[ 6*line  ] +
791               g_aiT32[TRANSFORM_INVERSE][10][k]*src[ 10*line ] + g_aiT32[TRANSFORM_INVERSE][14][k]*src[ 14*line ] +
792               g_aiT32[TRANSFORM_INVERSE][18][k]*src[ 18*line ] + g_aiT32[TRANSFORM_INVERSE][22][k]*src[ 22*line ] +
793               g_aiT32[TRANSFORM_INVERSE][26][k]*src[ 26*line ] + g_aiT32[TRANSFORM_INVERSE][30][k]*src[ 30*line ];
794     }
795     for (k=0;k<4;k++)
796     {
797       EEO[k] = g_aiT32[TRANSFORM_INVERSE][ 4][k]*src[  4*line ] + g_aiT32[TRANSFORM_INVERSE][12][k]*src[ 12*line ] +
798                g_aiT32[TRANSFORM_INVERSE][20][k]*src[ 20*line ] + g_aiT32[TRANSFORM_INVERSE][28][k]*src[ 28*line ];
799     }
800     EEEO[0] = g_aiT32[TRANSFORM_INVERSE][8][0]*src[ 8*line ] + g_aiT32[TRANSFORM_INVERSE][24][0]*src[ 24*line ];
801     EEEO[1] = g_aiT32[TRANSFORM_INVERSE][8][1]*src[ 8*line ] + g_aiT32[TRANSFORM_INVERSE][24][1]*src[ 24*line ];
802     EEEE[0] = g_aiT32[TRANSFORM_INVERSE][0][0]*src[ 0      ] + g_aiT32[TRANSFORM_INVERSE][16][0]*src[ 16*line ];
803     EEEE[1] = g_aiT32[TRANSFORM_INVERSE][0][1]*src[ 0      ] + g_aiT32[TRANSFORM_INVERSE][16][1]*src[ 16*line ];
804 
805     /* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
806     EEE[0] = EEEE[0] + EEEO[0];
807     EEE[3] = EEEE[0] - EEEO[0];
808     EEE[1] = EEEE[1] + EEEO[1];
809     EEE[2] = EEEE[1] - EEEO[1];
810     for (k=0;k<4;k++)
811     {
812       EE[k] = EEE[k] + EEO[k];
813       EE[k+4] = EEE[3-k] - EEO[3-k];
814     }
815     for (k=0;k<8;k++)
816     {
817       E[k] = EE[k] + EO[k];
818       E[k+8] = EE[7-k] - EO[7-k];
819     }
820     for (k=0;k<16;k++)
821     {
822       dst[k]    = Clip3( outputMinimum, outputMaximum, (E[k] + O[k] + add)>>shift );
823       dst[k+16] = Clip3( outputMinimum, outputMaximum, (E[15-k] - O[15-k] + add)>>shift );
824     }
825     src ++;
826     dst += 32;
827   }
828 }
829 
830 /** MxN forward transform (2D)
831 *  \param block input data (residual)
832 *  \param coeff output data (transform coefficients)
833 *  \param iWidth input data (width of transform)
834 *  \param iHeight input data (height of transform)
835 */
xTrMxN(Int bitDepth,TCoeff * block,TCoeff * coeff,Int iWidth,Int iHeight,Bool useDST,const Int maxTrDynamicRange)836 Void xTrMxN(Int bitDepth, TCoeff *block, TCoeff *coeff, Int iWidth, Int iHeight, Bool useDST, const Int maxTrDynamicRange)
837 {
838   static const Int TRANSFORM_MATRIX_SHIFT = g_transformMatrixShift[TRANSFORM_FORWARD];
839 
840   const Int shift_1st = ((g_aucConvertToBit[iWidth] + 2) +  bitDepth + TRANSFORM_MATRIX_SHIFT) - maxTrDynamicRange;
841   const Int shift_2nd = (g_aucConvertToBit[iHeight] + 2) + TRANSFORM_MATRIX_SHIFT;
842 
843   assert(shift_1st >= 0);
844   assert(shift_2nd >= 0);
845 
846   TCoeff tmp[ MAX_TU_SIZE * MAX_TU_SIZE ];
847 
848   switch (iWidth)
849   {
850     case 4:
851       {
852         if ((iHeight == 4) && useDST)    // Check for DCT or DST
853         {
854            fastForwardDst( block, tmp, shift_1st );
855         }
856         else partialButterfly4 ( block, tmp, shift_1st, iHeight );
857       }
858       break;
859 
860     case 8:     partialButterfly8 ( block, tmp, shift_1st, iHeight );  break;
861     case 16:    partialButterfly16( block, tmp, shift_1st, iHeight );  break;
862     case 32:    partialButterfly32( block, tmp, shift_1st, iHeight );  break;
863     default:
864       assert(0); exit (1); break;
865   }
866 
867   switch (iHeight)
868   {
869     case 4:
870       {
871         if ((iWidth == 4) && useDST)    // Check for DCT or DST
872         {
873           fastForwardDst( tmp, coeff, shift_2nd );
874         }
875         else partialButterfly4 ( tmp, coeff, shift_2nd, iWidth );
876       }
877       break;
878 
879     case 8:     partialButterfly8 ( tmp, coeff, shift_2nd, iWidth );    break;
880     case 16:    partialButterfly16( tmp, coeff, shift_2nd, iWidth );    break;
881     case 32:    partialButterfly32( tmp, coeff, shift_2nd, iWidth );    break;
882     default:
883       assert(0); exit (1); break;
884   }
885 }
886 
887 
888 /** MxN inverse transform (2D)
889 *  \param coeff input data (transform coefficients)
890 *  \param block output data (residual)
891 *  \param iWidth input data (width of transform)
892 *  \param iHeight input data (height of transform)
893 */
xITrMxN(Int bitDepth,TCoeff * coeff,TCoeff * block,Int iWidth,Int iHeight,Bool useDST,const Int maxTrDynamicRange)894 Void xITrMxN(Int bitDepth, TCoeff *coeff, TCoeff *block, Int iWidth, Int iHeight, Bool useDST, const Int maxTrDynamicRange)
895 {
896   static const Int TRANSFORM_MATRIX_SHIFT = g_transformMatrixShift[TRANSFORM_INVERSE];
897 
898   Int shift_1st = TRANSFORM_MATRIX_SHIFT + 1; //1 has been added to shift_1st at the expense of shift_2nd
899   Int shift_2nd = (TRANSFORM_MATRIX_SHIFT + maxTrDynamicRange - 1) - bitDepth;
900   const TCoeff clipMinimum = -(1 << maxTrDynamicRange);
901   const TCoeff clipMaximum =  (1 << maxTrDynamicRange) - 1;
902 
903   assert(shift_1st >= 0);
904   assert(shift_2nd >= 0);
905 
906   TCoeff tmp[MAX_TU_SIZE * MAX_TU_SIZE];
907 
908   switch (iHeight)
909   {
910     case 4:
911       {
912         if ((iWidth == 4) && useDST)    // Check for DCT or DST
913         {
914           fastInverseDst( coeff, tmp, shift_1st, clipMinimum, clipMaximum);
915         }
916         else partialButterflyInverse4 ( coeff, tmp, shift_1st, iWidth, clipMinimum, clipMaximum);
917       }
918       break;
919 
920     case  8: partialButterflyInverse8 ( coeff, tmp, shift_1st, iWidth, clipMinimum, clipMaximum); break;
921     case 16: partialButterflyInverse16( coeff, tmp, shift_1st, iWidth, clipMinimum, clipMaximum); break;
922     case 32: partialButterflyInverse32( coeff, tmp, shift_1st, iWidth, clipMinimum, clipMaximum); break;
923 
924     default:
925       assert(0); exit (1); break;
926   }
927 
928   switch (iWidth)
929   {
930     // Clipping here is not in the standard, but is used to protect the "Pel" data type into which the inverse-transformed samples will be copied
931     case 4:
932       {
933         if ((iHeight == 4) && useDST)    // Check for DCT or DST
934         {
935           fastInverseDst( tmp, block, shift_2nd, std::numeric_limits<Pel>::min(), std::numeric_limits<Pel>::max() );
936         }
937         else partialButterflyInverse4 ( tmp, block, shift_2nd, iHeight, std::numeric_limits<Pel>::min(), std::numeric_limits<Pel>::max());
938       }
939       break;
940 
941     case  8: partialButterflyInverse8 ( tmp, block, shift_2nd, iHeight, std::numeric_limits<Pel>::min(), std::numeric_limits<Pel>::max()); break;
942     case 16: partialButterflyInverse16( tmp, block, shift_2nd, iHeight, std::numeric_limits<Pel>::min(), std::numeric_limits<Pel>::max()); break;
943     case 32: partialButterflyInverse32( tmp, block, shift_2nd, iHeight, std::numeric_limits<Pel>::min(), std::numeric_limits<Pel>::max()); break;
944 
945     default:
946       assert(0); exit (1); break;
947   }
948 }
949 
950 
951 // To minimize the distortion only. No rate is considered.
signBitHidingHDQ(const ComponentID compID,TCoeff * pQCoef,TCoeff * pCoef,TCoeff * deltaU,const TUEntropyCodingParameters & codingParameters)952 Void TComTrQuant::signBitHidingHDQ( const ComponentID compID, TCoeff* pQCoef, TCoeff* pCoef, TCoeff* deltaU, const TUEntropyCodingParameters &codingParameters )
953 {
954   const UInt width     = codingParameters.widthInGroups  << MLS_CG_LOG2_WIDTH;
955   const UInt height    = codingParameters.heightInGroups << MLS_CG_LOG2_HEIGHT;
956   const UInt groupSize = 1 << MLS_CG_SIZE;
957 
958   const TCoeff entropyCodingMinimum = -(1 << g_maxTrDynamicRange[toChannelType(compID)]);
959   const TCoeff entropyCodingMaximum =  (1 << g_maxTrDynamicRange[toChannelType(compID)]) - 1;
960 
961   Int lastCG = -1;
962   Int absSum = 0 ;
963   Int n ;
964 
965   for( Int subSet = (width*height-1) >> MLS_CG_SIZE; subSet >= 0; subSet-- )
966   {
967     Int  subPos = subSet << MLS_CG_SIZE;
968     Int  firstNZPosInCG=groupSize , lastNZPosInCG=-1 ;
969     absSum = 0 ;
970 
971     for(n = groupSize-1; n >= 0; --n )
972     {
973       if( pQCoef[ codingParameters.scan[ n + subPos ]] )
974       {
975         lastNZPosInCG = n;
976         break;
977       }
978     }
979 
980     for(n = 0; n <groupSize; n++ )
981     {
982       if( pQCoef[ codingParameters.scan[ n + subPos ]] )
983       {
984         firstNZPosInCG = n;
985         break;
986       }
987     }
988 
989     for(n = firstNZPosInCG; n <=lastNZPosInCG; n++ )
990     {
991       absSum += Int(pQCoef[ codingParameters.scan[ n + subPos ]]);
992     }
993 
994     if(lastNZPosInCG>=0 && lastCG==-1)
995     {
996       lastCG = 1 ;
997     }
998 
999     if( lastNZPosInCG-firstNZPosInCG>=SBH_THRESHOLD )
1000     {
1001       UInt signbit = (pQCoef[codingParameters.scan[subPos+firstNZPosInCG]]>0?0:1) ;
1002       if( signbit!=(absSum&0x1) )  //compare signbit with sum_parity
1003       {
1004         TCoeff curCost    = std::numeric_limits<TCoeff>::max();
1005         TCoeff minCostInc = std::numeric_limits<TCoeff>::max();
1006         Int minPos =-1, finalChange=0, curChange=0;
1007 
1008         for( n = (lastCG==1?lastNZPosInCG:groupSize-1) ; n >= 0; --n )
1009         {
1010           UInt blkPos   = codingParameters.scan[ n+subPos ];
1011           if(pQCoef[ blkPos ] != 0 )
1012           {
1013             if(deltaU[blkPos]>0)
1014             {
1015               curCost = - deltaU[blkPos];
1016               curChange=1 ;
1017             }
1018             else
1019             {
1020               //curChange =-1;
1021               if(n==firstNZPosInCG && abs(pQCoef[blkPos])==1)
1022               {
1023                 curCost = std::numeric_limits<TCoeff>::max();
1024               }
1025               else
1026               {
1027                 curCost = deltaU[blkPos];
1028                 curChange =-1;
1029               }
1030             }
1031           }
1032           else
1033           {
1034             if(n<firstNZPosInCG)
1035             {
1036               UInt thisSignBit = (pCoef[blkPos]>=0?0:1);
1037               if(thisSignBit != signbit )
1038               {
1039                 curCost = std::numeric_limits<TCoeff>::max();
1040               }
1041               else
1042               {
1043                 curCost = - (deltaU[blkPos])  ;
1044                 curChange = 1 ;
1045               }
1046             }
1047             else
1048             {
1049               curCost = - (deltaU[blkPos])  ;
1050               curChange = 1 ;
1051             }
1052           }
1053 
1054           if( curCost<minCostInc)
1055           {
1056             minCostInc = curCost ;
1057             finalChange = curChange ;
1058             minPos = blkPos ;
1059           }
1060         } //CG loop
1061 
1062         if(pQCoef[minPos] == entropyCodingMaximum || pQCoef[minPos] == entropyCodingMinimum)
1063         {
1064           finalChange = -1;
1065         }
1066 
1067         if(pCoef[minPos]>=0)
1068         {
1069           pQCoef[minPos] += finalChange ;
1070         }
1071         else
1072         {
1073           pQCoef[minPos] -= finalChange ;
1074         }
1075       } // Hide
1076     }
1077     if(lastCG==1)
1078     {
1079       lastCG=0 ;
1080     }
1081   } // TU loop
1082 
1083   return;
1084 }
1085 
1086 
xQuant(TComTU & rTu,TCoeff * pSrc,TCoeff * pDes,TCoeff * pArlDes,TCoeff & uiAbsSum,const ComponentID compID,const QpParam & cQP)1087 Void TComTrQuant::xQuant(       TComTU       &rTu,
1088                                 TCoeff      * pSrc,
1089                                 TCoeff      * pDes,
1090 #if ADAPTIVE_QP_SELECTION
1091                                 TCoeff      *pArlDes,
1092 #endif
1093                                 TCoeff       &uiAbsSum,
1094                           const ComponentID   compID,
1095                           const QpParam      &cQP )
1096 {
1097   const TComRectangle &rect = rTu.getRect(compID);
1098   const UInt uiWidth        = rect.width;
1099   const UInt uiHeight       = rect.height;
1100   TComDataCU* pcCU          = rTu.getCU();
1101   const UInt uiAbsPartIdx   = rTu.GetAbsPartIdxTU();
1102 
1103   TCoeff* piCoef    = pSrc;
1104   TCoeff* piQCoef   = pDes;
1105 #if ADAPTIVE_QP_SELECTION
1106   TCoeff* piArlCCoef = pArlDes;
1107 #endif
1108 
1109   const Bool useTransformSkip = pcCU->getTransformSkip(uiAbsPartIdx, compID);
1110 
1111   Bool useRDOQ = useTransformSkip ? m_useRDOQTS : m_useRDOQ;
1112   if ( useRDOQ && (isLuma(compID) || RDOQ_CHROMA) )
1113   {
1114 #if ADAPTIVE_QP_SELECTION
1115     xRateDistOptQuant( rTu, piCoef, pDes, pArlDes, uiAbsSum, compID, cQP );
1116 #else
1117     xRateDistOptQuant( rTu, piCoef, pDes, uiAbsSum, compID, cQP );
1118 #endif
1119   }
1120   else
1121   {
1122     TUEntropyCodingParameters codingParameters;
1123     getTUEntropyCodingParameters(codingParameters, rTu, compID);
1124 
1125     const TCoeff entropyCodingMinimum = -(1 << g_maxTrDynamicRange[toChannelType(compID)]);
1126     const TCoeff entropyCodingMaximum =  (1 << g_maxTrDynamicRange[toChannelType(compID)]) - 1;
1127 
1128     TCoeff deltaU[MAX_TU_SIZE * MAX_TU_SIZE];
1129 
1130     const UInt uiLog2TrSize = rTu.GetEquivalentLog2TrSize(compID);
1131 
1132     Int scalingListType = getScalingListType(pcCU->getPredictionMode(uiAbsPartIdx), compID);
1133     assert(scalingListType < SCALING_LIST_NUM);
1134     Int *piQuantCoeff = getQuantCoeff(scalingListType, cQP.rem, uiLog2TrSize-2);
1135 
1136     const Bool enableScalingLists             = getUseScalingList(uiWidth, uiHeight, (pcCU->getTransformSkip(uiAbsPartIdx, compID) != 0));
1137     const Int  defaultQuantisationCoefficient = g_quantScales[cQP.rem];
1138 
1139     /* for 422 chroma blocks, the effective scaling applied during transformation is not a power of 2, hence it cannot be
1140      * implemented as a bit-shift (the quantised result will be sqrt(2) * larger than required). Alternatively, adjust the
1141      * uiLog2TrSize applied in iTransformShift, such that the result is 1/sqrt(2) the required result (i.e. smaller)
1142      * Then a QP+3 (sqrt(2)) or QP-3 (1/sqrt(2)) method could be used to get the required result
1143      */
1144 
1145     // Represents scaling through forward transform
1146     Int iTransformShift = getTransformShift(toChannelType(compID), uiLog2TrSize);
1147     if (useTransformSkip && pcCU->getSlice()->getSPS()->getUseExtendedPrecision())
1148     {
1149       iTransformShift = std::max<Int>(0, iTransformShift);
1150     }
1151 
1152     const Int iQBits = QUANT_SHIFT + cQP.per + iTransformShift;
1153     // QBits will be OK for any internal bit depth as the reduction in transform shift is balanced by an increase in Qp_per due to QpBDOffset
1154 
1155 #if ADAPTIVE_QP_SELECTION
1156     Int iQBitsC = MAX_INT;
1157     Int iAddC   = MAX_INT;
1158 
1159     if (m_bUseAdaptQpSelect)
1160     {
1161       iQBitsC = iQBits - ARL_C_PRECISION;
1162       iAddC   = 1 << (iQBitsC-1);
1163     }
1164 #endif
1165 
1166     const Int iAdd   = (pcCU->getSlice()->getSliceType()==I_SLICE ? 171 : 85) << (iQBits-9);
1167     const Int qBits8 = iQBits - 8;
1168 
1169     for( Int uiBlockPos = 0; uiBlockPos < uiWidth*uiHeight; uiBlockPos++ )
1170     {
1171       const TCoeff iLevel   = piCoef[uiBlockPos];
1172       const TCoeff iSign    = (iLevel < 0 ? -1: 1);
1173 
1174       const Int64  tmpLevel = (Int64)abs(iLevel) * (enableScalingLists ? piQuantCoeff[uiBlockPos] : defaultQuantisationCoefficient);
1175 
1176 #if ADAPTIVE_QP_SELECTION
1177       if( m_bUseAdaptQpSelect )
1178       {
1179         piArlCCoef[uiBlockPos] = (TCoeff)((tmpLevel + iAddC ) >> iQBitsC);
1180       }
1181 #endif
1182 
1183       const TCoeff quantisedMagnitude = TCoeff((tmpLevel + iAdd ) >> iQBits);
1184       deltaU[uiBlockPos] = (TCoeff)((tmpLevel - (quantisedMagnitude<<iQBits) )>> qBits8);
1185 
1186       uiAbsSum += quantisedMagnitude;
1187       const TCoeff quantisedCoefficient = quantisedMagnitude * iSign;
1188 
1189       piQCoef[uiBlockPos] = Clip3<TCoeff>( entropyCodingMinimum, entropyCodingMaximum, quantisedCoefficient );
1190     } // for n
1191 
1192     if( pcCU->getSlice()->getPPS()->getSignHideFlag() )
1193     {
1194       if(uiAbsSum >= 2) //this prevents TUs with only one coefficient of value 1 from being tested
1195       {
1196         signBitHidingHDQ( compID, piQCoef, piCoef, deltaU, codingParameters ) ;
1197       }
1198     }
1199   } //if RDOQ
1200   //return;
1201 }
1202 
xDeQuant(TComTU & rTu,const TCoeff * pSrc,TCoeff * pDes,const ComponentID compID,const QpParam & cQP)1203 Void TComTrQuant::xDeQuant(       TComTU        &rTu,
1204                             const TCoeff       * pSrc,
1205                                   TCoeff       * pDes,
1206                             const ComponentID    compID,
1207                             const QpParam       &cQP )
1208 {
1209   assert(compID<MAX_NUM_COMPONENT);
1210 
1211         TComDataCU          *pcCU               = rTu.getCU();
1212   const UInt                 uiAbsPartIdx       = rTu.GetAbsPartIdxTU();
1213   const TComRectangle       &rect               = rTu.getRect(compID);
1214   const UInt                 uiWidth            = rect.width;
1215   const UInt                 uiHeight           = rect.height;
1216   const TCoeff        *const piQCoef            = pSrc;
1217         TCoeff        *const piCoef             = pDes;
1218   const UInt                 uiLog2TrSize       = rTu.GetEquivalentLog2TrSize(compID);
1219   const UInt                 numSamplesInBlock  = uiWidth*uiHeight;
1220   const TCoeff               transformMinimum   = -(1 << g_maxTrDynamicRange[toChannelType(compID)]);
1221   const TCoeff               transformMaximum   =  (1 << g_maxTrDynamicRange[toChannelType(compID)]) - 1;
1222   const Bool                 enableScalingLists = getUseScalingList(uiWidth, uiHeight, (pcCU->getTransformSkip(uiAbsPartIdx, compID) != 0));
1223   const Int                  scalingListType    = getScalingListType(pcCU->getPredictionMode(uiAbsPartIdx), compID);
1224 
1225   assert (scalingListType < SCALING_LIST_NUM);
1226   assert ( uiWidth <= m_uiMaxTrSize );
1227 
1228   // Represents scaling through forward transform
1229   const Bool bClipTransformShiftTo0 = (pcCU->getTransformSkip(uiAbsPartIdx, compID) != 0) && pcCU->getSlice()->getSPS()->getUseExtendedPrecision();
1230   const Int  originalTransformShift = getTransformShift(toChannelType(compID), uiLog2TrSize);
1231   const Int  iTransformShift        = bClipTransformShiftTo0 ? std::max<Int>(0, originalTransformShift) : originalTransformShift;
1232 
1233   const Int QP_per = cQP.per;
1234   const Int QP_rem = cQP.rem;
1235 
1236   const Int rightShift = (IQUANT_SHIFT - (iTransformShift + QP_per)) + (enableScalingLists ? LOG2_SCALING_LIST_NEUTRAL_VALUE : 0);
1237 
1238   if(enableScalingLists)
1239   {
1240     //from the dequantisation equation:
1241     //iCoeffQ                         = ((Intermediate_Int(clipQCoef) * piDequantCoef[deQuantIdx]) + iAdd ) >> rightShift
1242     //(sizeof(Intermediate_Int) * 8)  =              inputBitDepth    +    dequantCoefBits                   - rightShift
1243     const UInt             dequantCoefBits     = 1 + IQUANT_SHIFT + SCALING_LIST_BITS;
1244     const UInt             targetInputBitDepth = std::min<UInt>((g_maxTrDynamicRange[toChannelType(compID)] + 1), (((sizeof(Intermediate_Int) * 8) + rightShift) - dequantCoefBits));
1245 
1246     const Intermediate_Int inputMinimum        = -(1 << (targetInputBitDepth - 1));
1247     const Intermediate_Int inputMaximum        =  (1 << (targetInputBitDepth - 1)) - 1;
1248 
1249     Int *piDequantCoef = getDequantCoeff(scalingListType,QP_rem,uiLog2TrSize-2);
1250 
1251     if(rightShift > 0)
1252     {
1253       const Intermediate_Int iAdd = 1 << (rightShift - 1);
1254 
1255       for( Int n = 0; n < numSamplesInBlock; n++ )
1256       {
1257         const TCoeff           clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, piQCoef[n]));
1258         const Intermediate_Int iCoeffQ   = ((Intermediate_Int(clipQCoef) * piDequantCoef[n]) + iAdd ) >> rightShift;
1259 
1260         piCoef[n] = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ));
1261       }
1262     }
1263     else
1264     {
1265       const Int leftShift = -rightShift;
1266 
1267       for( Int n = 0; n < numSamplesInBlock; n++ )
1268       {
1269         const TCoeff           clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, piQCoef[n]));
1270         const Intermediate_Int iCoeffQ   = (Intermediate_Int(clipQCoef) * piDequantCoef[n]) << leftShift;
1271 
1272         piCoef[n] = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ));
1273       }
1274     }
1275   }
1276   else
1277   {
1278     const Int scale     =  g_invQuantScales[QP_rem];
1279     const Int scaleBits =     (IQUANT_SHIFT + 1)   ;
1280 
1281     //from the dequantisation equation:
1282     //iCoeffQ                         = Intermediate_Int((Int64(clipQCoef) * scale + iAdd) >> rightShift);
1283     //(sizeof(Intermediate_Int) * 8)  =                    inputBitDepth   + scaleBits      - rightShift
1284     const UInt             targetInputBitDepth = std::min<UInt>((g_maxTrDynamicRange[toChannelType(compID)] + 1), (((sizeof(Intermediate_Int) * 8) + rightShift) - scaleBits));
1285     const Intermediate_Int inputMinimum        = -(1 << (targetInputBitDepth - 1));
1286     const Intermediate_Int inputMaximum        =  (1 << (targetInputBitDepth - 1)) - 1;
1287 
1288     if (rightShift > 0)
1289     {
1290       const Intermediate_Int iAdd = 1 << (rightShift - 1);
1291 
1292       for( Int n = 0; n < numSamplesInBlock; n++ )
1293       {
1294         const TCoeff           clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, piQCoef[n]));
1295         const Intermediate_Int iCoeffQ   = (Intermediate_Int(clipQCoef) * scale + iAdd) >> rightShift;
1296 
1297         piCoef[n] = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ));
1298       }
1299     }
1300     else
1301     {
1302       const Int leftShift = -rightShift;
1303 
1304       for( Int n = 0; n < numSamplesInBlock; n++ )
1305       {
1306         const TCoeff           clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, piQCoef[n]));
1307         const Intermediate_Int iCoeffQ   = (Intermediate_Int(clipQCoef) * scale) << leftShift;
1308 
1309         piCoef[n] = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ));
1310       }
1311     }
1312   }
1313 }
1314 
1315 
init(UInt uiMaxTrSize,Bool bUseRDOQ,Bool bUseRDOQTS,Bool bEnc,Bool useTransformSkipFast,Bool bUseAdaptQpSelect)1316 Void TComTrQuant::init(   UInt  uiMaxTrSize,
1317                           Bool  bUseRDOQ,
1318                           Bool  bUseRDOQTS,
1319                           Bool  bEnc,
1320                           Bool  useTransformSkipFast
1321 #if ADAPTIVE_QP_SELECTION
1322                         , Bool bUseAdaptQpSelect
1323 #endif
1324                        )
1325 {
1326   m_uiMaxTrSize  = uiMaxTrSize;
1327   m_bEnc         = bEnc;
1328   m_useRDOQ      = bUseRDOQ;
1329   m_useRDOQTS    = bUseRDOQTS;
1330 #if ADAPTIVE_QP_SELECTION
1331   m_bUseAdaptQpSelect = bUseAdaptQpSelect;
1332 #endif
1333   m_useTransformSkipFast = useTransformSkipFast;
1334 }
1335 
1336 
transformNxN(TComTU & rTu,const ComponentID compID,Pel * pcResidual,const UInt uiStride,TCoeff * rpcCoeff,TCoeff * pcArlCoeff,TCoeff & uiAbsSum,const QpParam & cQP)1337 Void TComTrQuant::transformNxN(       TComTU        & rTu,
1338                                 const ComponentID     compID,
1339                                       Pel          *  pcResidual,
1340                                 const UInt            uiStride,
1341                                       TCoeff       *  rpcCoeff,
1342 #if ADAPTIVE_QP_SELECTION
1343                                       TCoeff       *  pcArlCoeff,
1344 #endif
1345                                       TCoeff        & uiAbsSum,
1346                                 const QpParam       & cQP
1347                               )
1348 {
1349   const TComRectangle &rect = rTu.getRect(compID);
1350   const UInt uiWidth        = rect.width;
1351   const UInt uiHeight       = rect.height;
1352   TComDataCU* pcCU          = rTu.getCU();
1353   const UInt uiAbsPartIdx   = rTu.GetAbsPartIdxTU();
1354   const UInt uiOrgTrDepth   = rTu.GetTransformDepthRel();
1355 
1356   uiAbsSum=0;
1357 
1358   RDPCMMode rdpcmMode = RDPCM_OFF;
1359   rdpcmNxN( rTu, compID, pcResidual, uiStride, cQP, rpcCoeff, uiAbsSum, rdpcmMode );
1360 
1361   if (rdpcmMode == RDPCM_OFF)
1362   {
1363     uiAbsSum = 0;
1364     //transform and quantise
1365     if(pcCU->getCUTransquantBypass(uiAbsPartIdx))
1366     {
1367       const Bool rotateResidual = rTu.isNonTransformedResidualRotated(compID);
1368       const UInt uiSizeMinus1   = (uiWidth * uiHeight) - 1;
1369 
1370       for (UInt y = 0, coefficientIndex = 0; y<uiHeight; y++)
1371       {
1372         for (UInt x = 0; x<uiWidth; x++, coefficientIndex++)
1373         {
1374           const Pel currentSample = pcResidual[(y * uiStride) + x];
1375 
1376           rpcCoeff[rotateResidual ? (uiSizeMinus1 - coefficientIndex) : coefficientIndex] = currentSample;
1377           uiAbsSum += TCoeff(abs(currentSample));
1378         }
1379       }
1380     }
1381     else
1382     {
1383 #ifdef DEBUG_TRANSFORM_AND_QUANTISE
1384       std::cout << g_debugCounter << ": " << uiWidth << "x" << uiHeight << " channel " << compID << " TU at input to transform\n";
1385       printBlock(pcResidual, uiWidth, uiHeight, uiStride);
1386 #endif
1387 
1388       assert( (pcCU->getSlice()->getSPS()->getMaxTrSize() >= uiWidth) );
1389 
1390       if(pcCU->getTransformSkip(uiAbsPartIdx, compID) != 0)
1391       {
1392         xTransformSkip( pcResidual, uiStride, m_plTempCoeff, rTu, compID );
1393       }
1394       else
1395       {
1396         xT( compID, rTu.useDST(compID), pcResidual, uiStride, m_plTempCoeff, uiWidth, uiHeight );
1397       }
1398 
1399 #ifdef DEBUG_TRANSFORM_AND_QUANTISE
1400       std::cout << g_debugCounter << ": " << uiWidth << "x" << uiHeight << " channel " << compID << " TU between transform and quantiser\n";
1401       printBlock(m_plTempCoeff, uiWidth, uiHeight, uiWidth);
1402 #endif
1403 
1404       xQuant( rTu, m_plTempCoeff, rpcCoeff,
1405 
1406 #if ADAPTIVE_QP_SELECTION
1407               pcArlCoeff,
1408 #endif
1409               uiAbsSum, compID, cQP );
1410 
1411 #ifdef DEBUG_TRANSFORM_AND_QUANTISE
1412       std::cout << g_debugCounter << ": " << uiWidth << "x" << uiHeight << " channel " << compID << " TU at output of quantiser\n";
1413       printBlock(rpcCoeff, uiWidth, uiHeight, uiWidth);
1414 #endif
1415     }
1416   }
1417 
1418     //set the CBF
1419   pcCU->setCbfPartRange((((uiAbsSum > 0) ? 1 : 0) << uiOrgTrDepth), compID, uiAbsPartIdx, rTu.GetAbsPartIdxNumParts(compID));
1420 }
1421 
1422 
invTransformNxN(TComTU & rTu,const ComponentID compID,Pel * pcResidual,const UInt uiStride,TCoeff * pcCoeff,const QpParam & cQP DEBUG_STRING_FN_DECLAREP (psDebug))1423 Void TComTrQuant::invTransformNxN(      TComTU        &rTu,
1424                                   const ComponentID    compID,
1425                                         Pel          *pcResidual,
1426                                   const UInt           uiStride,
1427                                         TCoeff       * pcCoeff,
1428                                   const QpParam       &cQP
1429                                         DEBUG_STRING_FN_DECLAREP(psDebug))
1430 {
1431   TComDataCU* pcCU=rTu.getCU();
1432   const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
1433   const TComRectangle &rect = rTu.getRect(compID);
1434   const UInt uiWidth = rect.width;
1435   const UInt uiHeight = rect.height;
1436 
1437   if (uiWidth != uiHeight) //for intra, the TU will have been split above this level, so this condition won't be true, hence this only affects inter
1438   {
1439     //------------------------------------------------
1440 
1441     //recurse deeper
1442 
1443     TComTURecurse subTURecurse(rTu, false, TComTU::VERTICAL_SPLIT, true, compID);
1444 
1445     do
1446     {
1447       //------------------
1448 
1449       const UInt lineOffset = subTURecurse.GetSectionNumber() * subTURecurse.getRect(compID).height;
1450 
1451       Pel    *subTUResidual     = pcResidual + (lineOffset * uiStride);
1452       TCoeff *subTUCoefficients = pcCoeff     + (lineOffset * subTURecurse.getRect(compID).width);
1453 
1454       invTransformNxN(subTURecurse, compID, subTUResidual, uiStride, subTUCoefficients, cQP DEBUG_STRING_PASS_INTO(psDebug));
1455 
1456       //------------------
1457 
1458     }
1459     while (subTURecurse.nextSection(rTu));
1460 
1461     //------------------------------------------------
1462 
1463     return;
1464   }
1465 
1466 #if defined DEBUG_STRING
1467   if (psDebug)
1468   {
1469     std::stringstream ss(stringstream::out);
1470     printBlockToStream(ss, (compID==0)?"###InvTran ip Ch0: " : ((compID==1)?"###InvTran ip Ch1: ":"###InvTran ip Ch2: "), pcCoeff, uiWidth, uiHeight, uiWidth);
1471     DEBUG_STRING_APPEND((*psDebug), ss.str())
1472   }
1473 #endif
1474 
1475   if(pcCU->getCUTransquantBypass(uiAbsPartIdx))
1476   {
1477     const Bool rotateResidual = rTu.isNonTransformedResidualRotated(compID);
1478     const UInt uiSizeMinus1   = (uiWidth * uiHeight) - 1;
1479 
1480     for (UInt y = 0, coefficientIndex = 0; y<uiHeight; y++)
1481     {
1482       for (UInt x = 0; x<uiWidth; x++, coefficientIndex++)
1483       {
1484         pcResidual[(y * uiStride) + x] = Pel(pcCoeff[rotateResidual ? (uiSizeMinus1 - coefficientIndex) : coefficientIndex]);
1485       }
1486     }
1487   }
1488   else
1489   {
1490 #ifdef DEBUG_TRANSFORM_AND_QUANTISE
1491     std::cout << g_debugCounter << ": " << uiWidth << "x" << uiHeight << " channel " << compID << " TU at input to dequantiser\n";
1492     printBlock(pcCoeff, uiWidth, uiHeight, uiWidth);
1493 #endif
1494 
1495     xDeQuant(rTu, pcCoeff, m_plTempCoeff, compID, cQP);
1496 
1497 #ifdef DEBUG_TRANSFORM_AND_QUANTISE
1498     std::cout << g_debugCounter << ": " << uiWidth << "x" << uiHeight << " channel " << compID << " TU between dequantiser and inverse-transform\n";
1499     printBlock(m_plTempCoeff, uiWidth, uiHeight, uiWidth);
1500 #endif
1501 
1502 #if defined DEBUG_STRING
1503     if (psDebug)
1504     {
1505       std::stringstream ss(stringstream::out);
1506       printBlockToStream(ss, "###InvTran deq: ", m_plTempCoeff, uiWidth, uiHeight, uiWidth);
1507       (*psDebug)+=ss.str();
1508     }
1509 #endif
1510 
1511     if(pcCU->getTransformSkip(uiAbsPartIdx, compID))
1512     {
1513       xITransformSkip( m_plTempCoeff, pcResidual, uiStride, rTu, compID );
1514 
1515 #if defined DEBUG_STRING
1516       if (psDebug)
1517       {
1518         std::stringstream ss(stringstream::out);
1519         printBlockToStream(ss, "###InvTran resi: ", pcResidual, uiWidth, uiHeight, uiStride);
1520         (*psDebug)+=ss.str();
1521         (*psDebug)+="(<- was a Transform-skipped block)\n";
1522       }
1523 #endif
1524     }
1525     else
1526     {
1527       xIT( compID, rTu.useDST(compID), m_plTempCoeff, pcResidual, uiStride, uiWidth, uiHeight );
1528 
1529 #if defined DEBUG_STRING
1530       if (psDebug)
1531       {
1532         std::stringstream ss(stringstream::out);
1533         printBlockToStream(ss, "###InvTran resi: ", pcResidual, uiWidth, uiHeight, uiStride);
1534         (*psDebug)+=ss.str();
1535         (*psDebug)+="(<- was a Transformed block)\n";
1536       }
1537 #endif
1538     }
1539 
1540 #ifdef DEBUG_TRANSFORM_AND_QUANTISE
1541     std::cout << g_debugCounter << ": " << uiWidth << "x" << uiHeight << " channel " << compID << " TU at output of inverse-transform\n";
1542     printBlock(pcResidual, uiWidth, uiHeight, uiStride);
1543     g_debugCounter++;
1544 #endif
1545   }
1546 
1547   invRdpcmNxN( rTu, compID, pcResidual, uiStride );
1548 }
1549 
invRecurTransformNxN(const ComponentID compID,TComYuv * pResidual,TComTU & rTu)1550 Void TComTrQuant::invRecurTransformNxN( const ComponentID compID,
1551                                         TComYuv *pResidual,
1552                                         TComTU &rTu)
1553 {
1554   if (!rTu.ProcessComponentSection(compID)) return;
1555 
1556   TComDataCU* pcCU = rTu.getCU();
1557   UInt absPartIdxTU = rTu.GetAbsPartIdxTU();
1558   UInt uiTrMode=rTu.GetTransformDepthRel();
1559   if( (pcCU->getCbf(absPartIdxTU, compID, uiTrMode) == 0) && (isLuma(compID) || !pcCU->getSlice()->getPPS()->getUseCrossComponentPrediction()) )
1560   {
1561     return;
1562   }
1563 
1564   if( uiTrMode == pcCU->getTransformIdx( absPartIdxTU ) )
1565   {
1566     const TComRectangle &tuRect      = rTu.getRect(compID);
1567     const Int            uiStride    = pResidual->getStride( compID );
1568           Pel           *rpcResidual = pResidual->getAddr( compID );
1569           UInt           uiAddr      = (tuRect.x0 + uiStride*tuRect.y0);
1570           Pel           *pResi       = rpcResidual + uiAddr;
1571           TCoeff        *pcCoeff     = pcCU->getCoeff(compID) + rTu.getCoefficientOffset(compID);
1572 
1573     const QpParam cQP(*pcCU, compID);
1574 
1575     if(pcCU->getCbf(absPartIdxTU, compID, uiTrMode) != 0)
1576     {
1577       DEBUG_STRING_NEW(sTemp)
1578 #ifdef DEBUG_STRING
1579       std::string *psDebug=((DebugOptionList::DebugString_InvTran.getInt()&(pcCU->isIntra(absPartIdxTU)?1:(pcCU->isInter(absPartIdxTU)?2:4)))!=0) ? &sTemp : 0;
1580 #endif
1581 
1582       invTransformNxN( rTu, compID, pResi, uiStride, pcCoeff, cQP DEBUG_STRING_PASS_INTO(psDebug) );
1583 
1584 #ifdef DEBUG_STRING
1585       if (psDebug != 0)
1586         std::cout << (*psDebug);
1587 #endif
1588     }
1589 
1590     if (isChroma(compID) && (pcCU->getCrossComponentPredictionAlpha(absPartIdxTU, compID) != 0))
1591     {
1592       const Pel *piResiLuma = pResidual->getAddr( COMPONENT_Y );
1593       const Int  strideLuma = pResidual->getStride( COMPONENT_Y );
1594       const Int  tuWidth    = rTu.getRect( compID ).width;
1595       const Int  tuHeight   = rTu.getRect( compID ).height;
1596 
1597       if(pcCU->getCbf(absPartIdxTU, COMPONENT_Y, uiTrMode) != 0)
1598       {
1599         pResi = rpcResidual + uiAddr;
1600         const Pel *pResiLuma = piResiLuma + uiAddr;
1601 
1602         crossComponentPrediction( rTu, compID, pResiLuma, pResi, pResi, tuWidth, tuHeight, strideLuma, uiStride, uiStride, true );
1603       }
1604     }
1605   }
1606   else
1607   {
1608     TComTURecurse tuRecurseChild(rTu, false);
1609     do
1610     {
1611       invRecurTransformNxN( compID, pResidual, tuRecurseChild );
1612     }
1613     while (tuRecurseChild.nextSection(rTu));
1614   }
1615 }
1616 
applyForwardRDPCM(TComTU & rTu,const ComponentID compID,Pel * pcResidual,const UInt uiStride,const QpParam & cQP,TCoeff * pcCoeff,TCoeff & uiAbsSum,const RDPCMMode mode)1617 Void TComTrQuant::applyForwardRDPCM( TComTU& rTu, const ComponentID compID, Pel* pcResidual, const UInt uiStride, const QpParam& cQP, TCoeff* pcCoeff, TCoeff &uiAbsSum, const RDPCMMode mode )
1618 {
1619   TComDataCU *pcCU=rTu.getCU();
1620   const UInt uiAbsPartIdx=rTu.GetAbsPartIdxTU();
1621 
1622   const Bool bLossless      = pcCU->getCUTransquantBypass( uiAbsPartIdx );
1623   const UInt uiWidth        = rTu.getRect(compID).width;
1624   const UInt uiHeight       = rTu.getRect(compID).height;
1625   const Bool rotateResidual = rTu.isNonTransformedResidualRotated(compID);
1626   const UInt uiSizeMinus1   = (uiWidth * uiHeight) - 1;
1627 
1628   Pel reconstructedResi[MAX_TU_SIZE * MAX_TU_SIZE];
1629 
1630   UInt uiX = 0;
1631   UInt uiY = 0;
1632 
1633         UInt &majorAxis             = (mode == RDPCM_HOR) ? uiX      : uiY;
1634         UInt &minorAxis             = (mode == RDPCM_HOR) ? uiY      : uiX;
1635   const UInt  majorAxisLimit        = (mode == RDPCM_HOR) ? uiWidth  : uiHeight;
1636   const UInt  minorAxisLimit        = (mode == RDPCM_HOR) ? uiHeight : uiWidth;
1637   const UInt  referenceSampleOffset = (mode == RDPCM_HOR) ? 1        : uiWidth;
1638 
1639   const Bool bUseHalfRoundingPoint = (mode != RDPCM_OFF);
1640 
1641   uiAbsSum = 0;
1642 
1643   for ( majorAxis = 0; majorAxis < majorAxisLimit; majorAxis++ )
1644   {
1645     for ( minorAxis = 0; minorAxis < minorAxisLimit; minorAxis++ )
1646     {
1647       const UInt sampleIndex      = (uiY * uiWidth) + uiX;
1648       const UInt coefficientIndex = (rotateResidual ? (uiSizeMinus1-sampleIndex) : sampleIndex);
1649       const Pel  currentSample    = pcResidual[(uiY * uiStride) + uiX];
1650       const Pel  referenceSample  = ((mode != RDPCM_OFF) && (majorAxis > 0)) ? reconstructedResi[sampleIndex - referenceSampleOffset] : 0;
1651 
1652       const Pel  encoderSideDelta = currentSample - referenceSample;
1653 
1654       Pel reconstructedDelta;
1655       if ( bLossless )
1656       {
1657         pcCoeff[coefficientIndex] = encoderSideDelta;
1658         reconstructedDelta        = encoderSideDelta;
1659       }
1660       else
1661       {
1662         transformSkipQuantOneSample(rTu, compID, encoderSideDelta, pcCoeff, coefficientIndex, cQP, bUseHalfRoundingPoint);
1663         invTrSkipDeQuantOneSample  (rTu, compID, pcCoeff[coefficientIndex], reconstructedDelta, cQP, coefficientIndex);
1664       }
1665 
1666       uiAbsSum += abs(pcCoeff[coefficientIndex]);
1667 
1668       reconstructedResi[sampleIndex] = reconstructedDelta + referenceSample;
1669     }
1670   }
1671 }
1672 
rdpcmNxN(TComTU & rTu,const ComponentID compID,Pel * pcResidual,const UInt uiStride,const QpParam & cQP,TCoeff * pcCoeff,TCoeff & uiAbsSum,RDPCMMode & rdpcmMode)1673 Void TComTrQuant::rdpcmNxN   ( TComTU& rTu, const ComponentID compID, Pel* pcResidual, const UInt uiStride, const QpParam& cQP, TCoeff* pcCoeff, TCoeff &uiAbsSum, RDPCMMode& rdpcmMode )
1674 {
1675   TComDataCU *pcCU=rTu.getCU();
1676   const UInt uiAbsPartIdx=rTu.GetAbsPartIdxTU();
1677 
1678   if (!pcCU->isRDPCMEnabled(uiAbsPartIdx) || ((pcCU->getTransformSkip(uiAbsPartIdx, compID) == 0) && !pcCU->getCUTransquantBypass(uiAbsPartIdx)))
1679   {
1680     rdpcmMode = RDPCM_OFF;
1681   }
1682   else if ( pcCU->isIntra( uiAbsPartIdx ) )
1683   {
1684     const ChromaFormat chFmt = pcCU->getPic()->getPicYuvOrg()->getChromaFormat();
1685     const ChannelType chType = toChannelType(compID);
1686     const UInt uiChPredMode  = pcCU->getIntraDir( chType, uiAbsPartIdx );
1687     const UInt uiChCodedMode = (uiChPredMode==DM_CHROMA_IDX && isChroma(compID)) ? pcCU->getIntraDir(CHANNEL_TYPE_LUMA, getChromasCorrespondingPULumaIdx(uiAbsPartIdx, chFmt)) : uiChPredMode;
1688     const UInt uiChFinalMode = ((chFmt == CHROMA_422)       && isChroma(compID)) ? g_chroma422IntraAngleMappingTable[uiChCodedMode] : uiChCodedMode;
1689 
1690     if (uiChFinalMode == VER_IDX || uiChFinalMode == HOR_IDX)
1691     {
1692       rdpcmMode = (uiChFinalMode == VER_IDX) ? RDPCM_VER : RDPCM_HOR;
1693       applyForwardRDPCM( rTu, compID, pcResidual, uiStride, cQP, pcCoeff, uiAbsSum, rdpcmMode );
1694     }
1695     else rdpcmMode = RDPCM_OFF;
1696   }
1697   else // not intra, need to select the best mode
1698   {
1699     const UInt uiWidth  = rTu.getRect(compID).width;
1700     const UInt uiHeight = rTu.getRect(compID).height;
1701 
1702     RDPCMMode bestMode   = NUMBER_OF_RDPCM_MODES;
1703     TCoeff    bestAbsSum = std::numeric_limits<TCoeff>::max();
1704     TCoeff    bestCoefficients[MAX_TU_SIZE * MAX_TU_SIZE];
1705 
1706     for (UInt modeIndex = 0; modeIndex < NUMBER_OF_RDPCM_MODES; modeIndex++)
1707     {
1708       const RDPCMMode mode = RDPCMMode(modeIndex);
1709 
1710       TCoeff currAbsSum = 0;
1711 
1712       applyForwardRDPCM( rTu, compID, pcResidual, uiStride, cQP, pcCoeff, currAbsSum, mode );
1713 
1714       if (currAbsSum < bestAbsSum)
1715       {
1716         bestMode   = mode;
1717         bestAbsSum = currAbsSum;
1718         if (mode != RDPCM_OFF)
1719         {
1720           memcpy(bestCoefficients, pcCoeff, (uiWidth * uiHeight * sizeof(TCoeff)));
1721         }
1722       }
1723     }
1724 
1725     rdpcmMode = bestMode;
1726     uiAbsSum  = bestAbsSum;
1727 
1728     if (rdpcmMode != RDPCM_OFF) //the TU is re-transformed and quantised if DPCM_OFF is returned, so there is no need to preserve it here
1729     {
1730       memcpy(pcCoeff, bestCoefficients, (uiWidth * uiHeight * sizeof(TCoeff)));
1731     }
1732   }
1733 
1734   pcCU->setExplicitRdpcmModePartRange(rdpcmMode, compID, uiAbsPartIdx, rTu.GetAbsPartIdxNumParts(compID));
1735 }
1736 
invRdpcmNxN(TComTU & rTu,const ComponentID compID,Pel * pcResidual,const UInt uiStride)1737 Void TComTrQuant::invRdpcmNxN( TComTU& rTu, const ComponentID compID, Pel* pcResidual, const UInt uiStride )
1738 {
1739   TComDataCU *pcCU=rTu.getCU();
1740   const UInt uiAbsPartIdx=rTu.GetAbsPartIdxTU();
1741 
1742   if (pcCU->isRDPCMEnabled( uiAbsPartIdx ) && ((pcCU->getTransformSkip(uiAbsPartIdx, compID ) != 0) || pcCU->getCUTransquantBypass(uiAbsPartIdx)))
1743   {
1744     const UInt uiWidth  = rTu.getRect(compID).width;
1745     const UInt uiHeight = rTu.getRect(compID).height;
1746 
1747     RDPCMMode rdpcmMode = RDPCM_OFF;
1748 
1749     if ( pcCU->isIntra( uiAbsPartIdx ) )
1750     {
1751       const ChromaFormat chFmt = pcCU->getPic()->getPicYuvRec()->getChromaFormat();
1752       const ChannelType chType = toChannelType(compID);
1753       const UInt uiChPredMode  = pcCU->getIntraDir( chType, uiAbsPartIdx );
1754       const UInt uiChCodedMode = (uiChPredMode==DM_CHROMA_IDX && isChroma(compID)) ? pcCU->getIntraDir(CHANNEL_TYPE_LUMA, getChromasCorrespondingPULumaIdx(uiAbsPartIdx, chFmt)) : uiChPredMode;
1755       const UInt uiChFinalMode = ((chFmt == CHROMA_422)       && isChroma(compID)) ? g_chroma422IntraAngleMappingTable[uiChCodedMode] : uiChCodedMode;
1756 
1757       if (uiChFinalMode == VER_IDX || uiChFinalMode == HOR_IDX)
1758       {
1759         rdpcmMode = (uiChFinalMode == VER_IDX) ? RDPCM_VER : RDPCM_HOR;
1760       }
1761     }
1762     else  // not intra case
1763     {
1764       rdpcmMode = RDPCMMode(pcCU->getExplicitRdpcmMode( compID, uiAbsPartIdx ));
1765     }
1766 
1767     if (rdpcmMode == RDPCM_VER)
1768     {
1769       pcResidual += uiStride; //start from row 1
1770 
1771       for( UInt uiY = 1; uiY < uiHeight; uiY++ )
1772       {
1773         for( UInt uiX = 0; uiX < uiWidth; uiX++ )
1774         {
1775           pcResidual[ uiX ] = pcResidual[ uiX ] + pcResidual [ (Int)uiX - (Int)uiStride ];
1776         }
1777         pcResidual += uiStride;
1778       }
1779     }
1780     else if (rdpcmMode == RDPCM_HOR)
1781     {
1782       for( UInt uiY = 0; uiY < uiHeight; uiY++ )
1783       {
1784         for( UInt uiX = 1; uiX < uiWidth; uiX++ )
1785         {
1786           pcResidual[ uiX ] = pcResidual[ uiX ] + pcResidual [ (Int)uiX-1 ];
1787         }
1788         pcResidual += uiStride;
1789       }
1790     }
1791   }
1792 }
1793 
1794 // ------------------------------------------------------------------------------------------------
1795 // Logical transform
1796 // ------------------------------------------------------------------------------------------------
1797 
1798 /** Wrapper function between HM interface and core NxN forward transform (2D)
1799  *  \param piBlkResi input data (residual)
1800  *  \param psCoeff output data (transform coefficients)
1801  *  \param uiStride stride of input residual data
1802  *  \param iSize transform size (iSize x iSize)
1803  *  \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
1804  */
xT(const ComponentID compID,Bool useDST,Pel * piBlkResi,UInt uiStride,TCoeff * psCoeff,Int iWidth,Int iHeight)1805 Void TComTrQuant::xT( const ComponentID compID, Bool useDST, Pel* piBlkResi, UInt uiStride, TCoeff* psCoeff, Int iWidth, Int iHeight )
1806 {
1807 #if MATRIX_MULT
1808   if( iWidth == iHeight)
1809   {
1810     xTr(g_bitDepth[toChannelType(compID)], piBlkResi, psCoeff, uiStride, (UInt)iWidth, useDST, g_maxTrDynamicRange[toChannelType(compID)]);
1811     return;
1812   }
1813 #endif
1814 
1815   TCoeff block[ MAX_TU_SIZE * MAX_TU_SIZE ];
1816   TCoeff coeff[ MAX_TU_SIZE * MAX_TU_SIZE ];
1817 
1818   for (Int y = 0; y < iHeight; y++)
1819     for (Int x = 0; x < iWidth; x++)
1820     {
1821       block[(y * iWidth) + x] = piBlkResi[(y * uiStride) + x];
1822     }
1823 
1824   xTrMxN( g_bitDepth[toChannelType(compID)], block, coeff, iWidth, iHeight, useDST, g_maxTrDynamicRange[toChannelType(compID)] );
1825 
1826   memcpy(psCoeff, coeff, (iWidth * iHeight * sizeof(TCoeff)));
1827 }
1828 
1829 /** Wrapper function between HM interface and core NxN inverse transform (2D)
1830  *  \param plCoef input data (transform coefficients)
1831  *  \param pResidual output data (residual)
1832  *  \param uiStride stride of input residual data
1833  *  \param iSize transform size (iSize x iSize)
1834  *  \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
1835  */
xIT(const ComponentID compID,Bool useDST,TCoeff * plCoef,Pel * pResidual,UInt uiStride,Int iWidth,Int iHeight)1836 Void TComTrQuant::xIT( const ComponentID compID, Bool useDST, TCoeff* plCoef, Pel* pResidual, UInt uiStride, Int iWidth, Int iHeight )
1837 {
1838 #if MATRIX_MULT
1839   if( iWidth == iHeight )
1840   {
1841 #if O0043_BEST_EFFORT_DECODING
1842     xITr(g_bitDepthInStream[toChannelType(compID)], plCoef, pResidual, uiStride, (UInt)iWidth, useDST, g_maxTrDynamicRange[toChannelType(compID)]);
1843 #else
1844     xITr(g_bitDepth[toChannelType(compID)], plCoef, pResidual, uiStride, (UInt)iWidth, useDST, g_maxTrDynamicRange[toChannelType(compID)]);
1845 #endif
1846     return;
1847   }
1848 #endif
1849 
1850   TCoeff block[ MAX_TU_SIZE * MAX_TU_SIZE ];
1851   TCoeff coeff[ MAX_TU_SIZE * MAX_TU_SIZE ];
1852 
1853   memcpy(coeff, plCoef, (iWidth * iHeight * sizeof(TCoeff)));
1854 
1855 #if O0043_BEST_EFFORT_DECODING
1856   xITrMxN( g_bitDepthInStream[toChannelType(compID)], coeff, block, iWidth, iHeight, useDST, g_maxTrDynamicRange[toChannelType(compID)] );
1857 #else
1858   xITrMxN( g_bitDepth[toChannelType(compID)], coeff, block, iWidth, iHeight, useDST, g_maxTrDynamicRange[toChannelType(compID)] );
1859 #endif
1860 
1861   for (Int y = 0; y < iHeight; y++)
1862     for (Int x = 0; x < iWidth; x++)
1863     {
1864       pResidual[(y * uiStride) + x] = Pel(block[(y * iWidth) + x]);
1865     }
1866 }
1867 
1868 /** Wrapper function between HM interface and core 4x4 transform skipping
1869  *  \param piBlkResi input data (residual)
1870  *  \param psCoeff output data (transform coefficients)
1871  *  \param uiStride stride of input residual data
1872  *  \param iSize transform size (iSize x iSize)
1873  */
xTransformSkip(Pel * piBlkResi,UInt uiStride,TCoeff * psCoeff,TComTU & rTu,const ComponentID component)1874 Void TComTrQuant::xTransformSkip( Pel* piBlkResi, UInt uiStride, TCoeff* psCoeff, TComTU &rTu, const ComponentID component )
1875 {
1876   const TComRectangle &rect = rTu.getRect(component);
1877   const Int width           = rect.width;
1878   const Int height          = rect.height;
1879 
1880   Int iTransformShift = getTransformShift(toChannelType(component), rTu.GetEquivalentLog2TrSize(component));
1881   if (rTu.getCU()->getSlice()->getSPS()->getUseExtendedPrecision())
1882   {
1883     iTransformShift = std::max<Int>(0, iTransformShift);
1884   }
1885 
1886   const Bool rotateResidual = rTu.isNonTransformedResidualRotated(component);
1887   const UInt uiSizeMinus1   = (width * height) - 1;
1888 
1889   if (iTransformShift >= 0)
1890   {
1891     for (UInt y = 0, coefficientIndex = 0; y < height; y++)
1892     {
1893       for (UInt x = 0; x < width; x++, coefficientIndex++)
1894       {
1895         psCoeff[rotateResidual ? (uiSizeMinus1 - coefficientIndex) : coefficientIndex] = TCoeff(piBlkResi[(y * uiStride) + x]) << iTransformShift;
1896       }
1897     }
1898   }
1899   else //for very high bit depths
1900   {
1901     iTransformShift = -iTransformShift;
1902     const TCoeff offset = 1 << (iTransformShift - 1);
1903 
1904     for (UInt y = 0, coefficientIndex = 0; y < height; y++)
1905     {
1906       for (UInt x = 0; x < width; x++, coefficientIndex++)
1907       {
1908         psCoeff[rotateResidual ? (uiSizeMinus1 - coefficientIndex) : coefficientIndex] = (TCoeff(piBlkResi[(y * uiStride) + x]) + offset) >> iTransformShift;
1909       }
1910     }
1911   }
1912 }
1913 
1914 /** Wrapper function between HM interface and core NxN transform skipping
1915  *  \param plCoef input data (coefficients)
1916  *  \param pResidual output data (residual)
1917  *  \param uiStride stride of input residual data
1918  *  \param iSize transform size (iSize x iSize)
1919  */
xITransformSkip(TCoeff * plCoef,Pel * pResidual,UInt uiStride,TComTU & rTu,const ComponentID component)1920 Void TComTrQuant::xITransformSkip( TCoeff* plCoef, Pel* pResidual, UInt uiStride, TComTU &rTu, const ComponentID component )
1921 {
1922   const TComRectangle &rect = rTu.getRect(component);
1923   const Int width           = rect.width;
1924   const Int height          = rect.height;
1925 
1926   Int iTransformShift = getTransformShift(toChannelType(component), rTu.GetEquivalentLog2TrSize(component));
1927   if (rTu.getCU()->getSlice()->getSPS()->getUseExtendedPrecision())
1928   {
1929     iTransformShift = std::max<Int>(0, iTransformShift);
1930   }
1931 
1932   const Bool rotateResidual = rTu.isNonTransformedResidualRotated(component);
1933   const UInt uiSizeMinus1   = (width * height) - 1;
1934 
1935   if (iTransformShift >= 0)
1936   {
1937     const TCoeff offset = iTransformShift==0 ? 0 : (1 << (iTransformShift - 1));
1938 
1939     for (UInt y = 0, coefficientIndex = 0; y < height; y++)
1940     {
1941       for (UInt x = 0; x < width; x++, coefficientIndex++)
1942       {
1943         pResidual[(y * uiStride) + x] =  Pel((plCoef[rotateResidual ? (uiSizeMinus1 - coefficientIndex) : coefficientIndex] + offset) >> iTransformShift);
1944       }
1945     }
1946   }
1947   else //for very high bit depths
1948   {
1949     iTransformShift = -iTransformShift;
1950 
1951     for (UInt y = 0, coefficientIndex = 0; y < height; y++)
1952     {
1953       for (UInt x = 0; x < width; x++, coefficientIndex++)
1954       {
1955         pResidual[(y * uiStride) + x] = Pel(plCoef[rotateResidual ? (uiSizeMinus1 - coefficientIndex) : coefficientIndex] << iTransformShift);
1956       }
1957     }
1958   }
1959 }
1960 
1961 /** RDOQ with CABAC
1962  * \param pcCU pointer to coding unit structure
1963  * \param plSrcCoeff pointer to input buffer
1964  * \param piDstCoeff reference to pointer to output buffer
1965  * \param uiWidth block width
1966  * \param uiHeight block height
1967  * \param uiAbsSum reference to absolute sum of quantized transform coefficient
1968  * \param eTType plane type / luminance or chrominance
1969  * \param uiAbsPartIdx absolute partition index
1970  * \returns Void
1971  * Rate distortion optimized quantization for entropy
1972  * coding engines using probability models like CABAC
1973  */
xRateDistOptQuant(TComTU & rTu,TCoeff * plSrcCoeff,TCoeff * piDstCoeff,TCoeff * piArlDstCoeff,TCoeff & uiAbsSum,const ComponentID compID,const QpParam & cQP)1974 Void TComTrQuant::xRateDistOptQuant                 (       TComTU       &rTu,
1975                                                             TCoeff      * plSrcCoeff,
1976                                                             TCoeff      * piDstCoeff,
1977 #if ADAPTIVE_QP_SELECTION
1978                                                             TCoeff      * piArlDstCoeff,
1979 #endif
1980                                                             TCoeff       &uiAbsSum,
1981                                                       const ComponentID   compID,
1982                                                       const QpParam      &cQP  )
1983 {
1984   const TComRectangle  & rect             = rTu.getRect(compID);
1985   const UInt             uiWidth          = rect.width;
1986   const UInt             uiHeight         = rect.height;
1987         TComDataCU    *  pcCU             = rTu.getCU();
1988   const UInt             uiAbsPartIdx     = rTu.GetAbsPartIdxTU();
1989   const ChannelType      channelType      = toChannelType(compID);
1990   const UInt             uiLog2TrSize     = rTu.GetEquivalentLog2TrSize(compID);
1991 
1992   const Bool             extendedPrecision = pcCU->getSlice()->getSPS()->getUseExtendedPrecision();
1993 
1994   /* for 422 chroma blocks, the effective scaling applied during transformation is not a power of 2, hence it cannot be
1995    * implemented as a bit-shift (the quantised result will be sqrt(2) * larger than required). Alternatively, adjust the
1996    * uiLog2TrSize applied in iTransformShift, such that the result is 1/sqrt(2) the required result (i.e. smaller)
1997    * Then a QP+3 (sqrt(2)) or QP-3 (1/sqrt(2)) method could be used to get the required result
1998    */
1999 
2000   // Represents scaling through forward transform
2001   Int iTransformShift            = getTransformShift(channelType, uiLog2TrSize);
2002   if ((pcCU->getTransformSkip(uiAbsPartIdx, compID) != 0) && pcCU->getSlice()->getSPS()->getUseExtendedPrecision())
2003   {
2004     iTransformShift = std::max<Int>(0, iTransformShift);
2005   }
2006 
2007   const Bool bUseGolombRiceParameterAdaptation = pcCU->getSlice()->getSPS()->getUseGolombRiceParameterAdaptation();
2008   const UInt initialGolombRiceParameter        = m_pcEstBitsSbac->golombRiceAdaptationStatistics[rTu.getGolombRiceStatisticsIndex(compID)] / RExt__GOLOMB_RICE_INCREMENT_DIVISOR;
2009         UInt uiGoRiceParam                     = initialGolombRiceParameter;
2010   Double     d64BlockUncodedCost               = 0;
2011   const UInt uiLog2BlockWidth                  = g_aucConvertToBit[ uiWidth  ] + 2;
2012   const UInt uiLog2BlockHeight                 = g_aucConvertToBit[ uiHeight ] + 2;
2013   const UInt uiMaxNumCoeff                     = uiWidth * uiHeight;
2014   assert(compID<MAX_NUM_COMPONENT);
2015 
2016   Int scalingListType = getScalingListType(pcCU->getPredictionMode(uiAbsPartIdx), compID);
2017   assert(scalingListType < SCALING_LIST_NUM);
2018 
2019 #if ADAPTIVE_QP_SELECTION
2020   memset(piArlDstCoeff, 0, sizeof(TCoeff) *  uiMaxNumCoeff);
2021 #endif
2022 
2023   Double pdCostCoeff [ MAX_TU_SIZE * MAX_TU_SIZE ];
2024   Double pdCostSig   [ MAX_TU_SIZE * MAX_TU_SIZE ];
2025   Double pdCostCoeff0[ MAX_TU_SIZE * MAX_TU_SIZE ];
2026   memset( pdCostCoeff, 0, sizeof(Double) *  uiMaxNumCoeff );
2027   memset( pdCostSig,   0, sizeof(Double) *  uiMaxNumCoeff );
2028   Int rateIncUp   [ MAX_TU_SIZE * MAX_TU_SIZE ];
2029   Int rateIncDown [ MAX_TU_SIZE * MAX_TU_SIZE ];
2030   Int sigRateDelta[ MAX_TU_SIZE * MAX_TU_SIZE ];
2031   TCoeff deltaU   [ MAX_TU_SIZE * MAX_TU_SIZE ];
2032   memset( rateIncUp,    0, sizeof(Int   ) *  uiMaxNumCoeff );
2033   memset( rateIncDown,  0, sizeof(Int   ) *  uiMaxNumCoeff );
2034   memset( sigRateDelta, 0, sizeof(Int   ) *  uiMaxNumCoeff );
2035   memset( deltaU,       0, sizeof(TCoeff) *  uiMaxNumCoeff );
2036 
2037   const Int iQBits = QUANT_SHIFT + cQP.per + iTransformShift;                   // Right shift of non-RDOQ quantizer;  level = (coeff*uiQ + offset)>>q_bits
2038   const Double *const pdErrScale = getErrScaleCoeff(scalingListType, (uiLog2TrSize-2), cQP.rem);
2039   const Int    *const piQCoef    = getQuantCoeff(scalingListType, cQP.rem, (uiLog2TrSize-2));
2040 
2041   const Bool   enableScalingLists             = getUseScalingList(uiWidth, uiHeight, (pcCU->getTransformSkip(uiAbsPartIdx, compID) != 0));
2042   const Int    defaultQuantisationCoefficient = g_quantScales[cQP.rem];
2043   const Double defaultErrorScale              = getErrScaleCoeffNoScalingList(scalingListType, (uiLog2TrSize-2), cQP.rem);
2044 
2045   const TCoeff entropyCodingMinimum = -(1 << g_maxTrDynamicRange[toChannelType(compID)]);
2046   const TCoeff entropyCodingMaximum =  (1 << g_maxTrDynamicRange[toChannelType(compID)]) - 1;
2047 
2048 #if ADAPTIVE_QP_SELECTION
2049   Int iQBitsC = iQBits - ARL_C_PRECISION;
2050   Int iAddC =  1 << (iQBitsC-1);
2051 #endif
2052 
2053   TUEntropyCodingParameters codingParameters;
2054   getTUEntropyCodingParameters(codingParameters, rTu, compID);
2055   const UInt uiCGSize = (1 << MLS_CG_SIZE);
2056 
2057   Double pdCostCoeffGroupSig[ MLS_GRP_NUM ];
2058   UInt uiSigCoeffGroupFlag[ MLS_GRP_NUM ];
2059   Int iCGLastScanPos = -1;
2060 
2061   UInt    uiCtxSet            = 0;
2062   Int     c1                  = 1;
2063   Int     c2                  = 0;
2064   Double  d64BaseCost         = 0;
2065   Int     iLastScanPos        = -1;
2066 
2067   UInt    c1Idx     = 0;
2068   UInt    c2Idx     = 0;
2069   Int     baseLevel;
2070 
2071   memset( pdCostCoeffGroupSig,   0, sizeof(Double) * MLS_GRP_NUM );
2072   memset( uiSigCoeffGroupFlag,   0, sizeof(UInt) * MLS_GRP_NUM );
2073 
2074   UInt uiCGNum = uiWidth * uiHeight >> MLS_CG_SIZE;
2075   Int iScanPos;
2076   coeffGroupRDStats rdStats;
2077 
2078   const UInt significanceMapContextOffset = getSignificanceMapContextOffset(compID);
2079 
2080   for (Int iCGScanPos = uiCGNum-1; iCGScanPos >= 0; iCGScanPos--)
2081   {
2082     UInt uiCGBlkPos = codingParameters.scanCG[ iCGScanPos ];
2083     UInt uiCGPosY   = uiCGBlkPos / codingParameters.widthInGroups;
2084     UInt uiCGPosX   = uiCGBlkPos - (uiCGPosY * codingParameters.widthInGroups);
2085 
2086     memset( &rdStats, 0, sizeof (coeffGroupRDStats));
2087 
2088     const Int patternSigCtx = TComTrQuant::calcPatternSigCtx(uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, codingParameters.widthInGroups, codingParameters.heightInGroups);
2089 
2090     for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
2091     {
2092       iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
2093       //===== quantization =====
2094       UInt    uiBlkPos          = codingParameters.scan[iScanPos];
2095       // set coeff
2096 
2097       const Int    quantisationCoefficient = (enableScalingLists) ? piQCoef   [uiBlkPos] : defaultQuantisationCoefficient;
2098       const Double errorScale              = (enableScalingLists) ? pdErrScale[uiBlkPos] : defaultErrorScale;
2099 
2100       const Int64  tmpLevel                = Int64(abs(plSrcCoeff[ uiBlkPos ])) * quantisationCoefficient;
2101 
2102       const Intermediate_Int lLevelDouble  = (Intermediate_Int)min<Int64>(tmpLevel, MAX_INTERMEDIATE_INT - (Intermediate_Int(1) << (iQBits - 1)));
2103 
2104 #if ADAPTIVE_QP_SELECTION
2105       if( m_bUseAdaptQpSelect )
2106       {
2107         piArlDstCoeff[uiBlkPos]   = (TCoeff)(( lLevelDouble + iAddC) >> iQBitsC );
2108       }
2109 #endif
2110       const UInt uiMaxAbsLevel  = std::min<UInt>(UInt(entropyCodingMaximum), UInt((lLevelDouble + (Intermediate_Int(1) << (iQBits - 1))) >> iQBits));
2111 
2112       const Double dErr         = Double( lLevelDouble );
2113       pdCostCoeff0[ iScanPos ]  = dErr * dErr * errorScale;
2114       d64BlockUncodedCost      += pdCostCoeff0[ iScanPos ];
2115       piDstCoeff[ uiBlkPos ]    = uiMaxAbsLevel;
2116 
2117       if ( uiMaxAbsLevel > 0 && iLastScanPos < 0 )
2118       {
2119         iLastScanPos            = iScanPos;
2120         uiCtxSet                = getContextSetIndex(compID, (iScanPos >> MLS_CG_SIZE), 0);
2121         iCGLastScanPos          = iCGScanPos;
2122       }
2123 
2124       if ( iLastScanPos >= 0 )
2125       {
2126         //===== coefficient level estimation =====
2127         UInt  uiLevel;
2128         UInt  uiOneCtx         = (NUM_ONE_FLAG_CTX_PER_SET * uiCtxSet) + c1;
2129         UInt  uiAbsCtx         = (NUM_ABS_FLAG_CTX_PER_SET * uiCtxSet) + c2;
2130 
2131         if( iScanPos == iLastScanPos )
2132         {
2133           uiLevel              = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
2134                                                   lLevelDouble, uiMaxAbsLevel, significanceMapContextOffset, uiOneCtx, uiAbsCtx, uiGoRiceParam,
2135                                                   c1Idx, c2Idx, iQBits, errorScale, 1, extendedPrecision, channelType
2136                                                   );
2137         }
2138         else
2139         {
2140           UShort uiCtxSig      = significanceMapContextOffset + getSigCtxInc( patternSigCtx, codingParameters, iScanPos, uiLog2BlockWidth, uiLog2BlockHeight, channelType );
2141 
2142           uiLevel              = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
2143                                                   lLevelDouble, uiMaxAbsLevel, uiCtxSig, uiOneCtx, uiAbsCtx, uiGoRiceParam,
2144                                                   c1Idx, c2Idx, iQBits, errorScale, 0, extendedPrecision, channelType
2145                                                   );
2146 
2147           sigRateDelta[ uiBlkPos ] = m_pcEstBitsSbac->significantBits[ uiCtxSig ][ 1 ] - m_pcEstBitsSbac->significantBits[ uiCtxSig ][ 0 ];
2148         }
2149 
2150         deltaU[ uiBlkPos ]        = TCoeff((lLevelDouble - (Intermediate_Int(uiLevel) << iQBits)) >> (iQBits-8));
2151 
2152         if( uiLevel > 0 )
2153         {
2154           Int rateNow = xGetICRate( uiLevel, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx, extendedPrecision, channelType );
2155           rateIncUp   [ uiBlkPos ] = xGetICRate( uiLevel+1, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx, extendedPrecision, channelType ) - rateNow;
2156           rateIncDown [ uiBlkPos ] = xGetICRate( uiLevel-1, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx, extendedPrecision, channelType ) - rateNow;
2157         }
2158         else // uiLevel == 0
2159         {
2160           rateIncUp   [ uiBlkPos ] = m_pcEstBitsSbac->m_greaterOneBits[ uiOneCtx ][ 0 ];
2161         }
2162         piDstCoeff[ uiBlkPos ] = uiLevel;
2163         d64BaseCost           += pdCostCoeff [ iScanPos ];
2164 
2165         baseLevel = (c1Idx < C1FLAG_NUMBER) ? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
2166         if( uiLevel >= baseLevel )
2167         {
2168           if (uiLevel > 3*(1<<uiGoRiceParam))
2169           {
2170             uiGoRiceParam = bUseGolombRiceParameterAdaptation ? (uiGoRiceParam + 1) : (std::min<UInt>((uiGoRiceParam + 1), 4));
2171           }
2172         }
2173         if ( uiLevel >= 1)
2174         {
2175           c1Idx ++;
2176         }
2177 
2178         //===== update bin model =====
2179         if( uiLevel > 1 )
2180         {
2181           c1 = 0;
2182           c2 += (c2 < 2);
2183           c2Idx ++;
2184         }
2185         else if( (c1 < 3) && (c1 > 0) && uiLevel)
2186         {
2187           c1++;
2188         }
2189 
2190         //===== context set update =====
2191         if( ( iScanPos % uiCGSize == 0 ) && ( iScanPos > 0 ) )
2192         {
2193           uiCtxSet          = getContextSetIndex(compID, ((iScanPos - 1) >> MLS_CG_SIZE), (c1 == 0)); //(iScanPos - 1) because we do this **before** entering the final group
2194           c1                = 1;
2195           c2                = 0;
2196           c1Idx             = 0;
2197           c2Idx             = 0;
2198           uiGoRiceParam     = initialGolombRiceParameter;
2199         }
2200       }
2201       else
2202       {
2203         d64BaseCost    += pdCostCoeff0[ iScanPos ];
2204       }
2205       rdStats.d64SigCost += pdCostSig[ iScanPos ];
2206       if (iScanPosinCG == 0 )
2207       {
2208         rdStats.d64SigCost_0 = pdCostSig[ iScanPos ];
2209       }
2210       if (piDstCoeff[ uiBlkPos ] )
2211       {
2212         uiSigCoeffGroupFlag[ uiCGBlkPos ] = 1;
2213         rdStats.d64CodedLevelandDist += pdCostCoeff[ iScanPos ] - pdCostSig[ iScanPos ];
2214         rdStats.d64UncodedDist += pdCostCoeff0[ iScanPos ];
2215         if ( iScanPosinCG != 0 )
2216         {
2217           rdStats.iNNZbeforePos0++;
2218         }
2219       }
2220     } //end for (iScanPosinCG)
2221 
2222     if (iCGLastScanPos >= 0)
2223     {
2224       if( iCGScanPos )
2225       {
2226         if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0)
2227         {
2228           UInt  uiCtxSig = getSigCoeffGroupCtxInc( uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, codingParameters.widthInGroups, codingParameters.heightInGroups );
2229           d64BaseCost += xGetRateSigCoeffGroup(0, uiCtxSig) - rdStats.d64SigCost;;
2230           pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(0, uiCtxSig);
2231         }
2232         else
2233         {
2234           if (iCGScanPos < iCGLastScanPos) //skip the last coefficient group, which will be handled together with last position below.
2235           {
2236             if ( rdStats.iNNZbeforePos0 == 0 )
2237             {
2238               d64BaseCost -= rdStats.d64SigCost_0;
2239               rdStats.d64SigCost -= rdStats.d64SigCost_0;
2240             }
2241             // rd-cost if SigCoeffGroupFlag = 0, initialization
2242             Double d64CostZeroCG = d64BaseCost;
2243 
2244             // add SigCoeffGroupFlag cost to total cost
2245             UInt  uiCtxSig = getSigCoeffGroupCtxInc( uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, codingParameters.widthInGroups, codingParameters.heightInGroups );
2246 
2247             if (iCGScanPos < iCGLastScanPos)
2248             {
2249               d64BaseCost  += xGetRateSigCoeffGroup(1, uiCtxSig);
2250               d64CostZeroCG += xGetRateSigCoeffGroup(0, uiCtxSig);
2251               pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(1, uiCtxSig);
2252             }
2253 
2254             // try to convert the current coeff group from non-zero to all-zero
2255             d64CostZeroCG += rdStats.d64UncodedDist;  // distortion for resetting non-zero levels to zero levels
2256             d64CostZeroCG -= rdStats.d64CodedLevelandDist;   // distortion and level cost for keeping all non-zero levels
2257             d64CostZeroCG -= rdStats.d64SigCost;     // sig cost for all coeffs, including zero levels and non-zerl levels
2258 
2259             // if we can save cost, change this block to all-zero block
2260             if ( d64CostZeroCG < d64BaseCost )
2261             {
2262               uiSigCoeffGroupFlag[ uiCGBlkPos ] = 0;
2263               d64BaseCost = d64CostZeroCG;
2264               if (iCGScanPos < iCGLastScanPos)
2265               {
2266                 pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(0, uiCtxSig);
2267               }
2268               // reset coeffs to 0 in this block
2269               for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
2270               {
2271                 iScanPos      = iCGScanPos*uiCGSize + iScanPosinCG;
2272                 UInt uiBlkPos = codingParameters.scan[ iScanPos ];
2273 
2274                 if (piDstCoeff[ uiBlkPos ])
2275                 {
2276                   piDstCoeff [ uiBlkPos ] = 0;
2277                   pdCostCoeff[ iScanPos ] = pdCostCoeff0[ iScanPos ];
2278                   pdCostSig  [ iScanPos ] = 0;
2279                 }
2280               }
2281             } // end if ( d64CostAllZeros < d64BaseCost )
2282           }
2283         } // end if if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0)
2284       }
2285       else
2286       {
2287         uiSigCoeffGroupFlag[ uiCGBlkPos ] = 1;
2288       }
2289     }
2290   } //end for (iCGScanPos)
2291 
2292   //===== estimate last position =====
2293   if ( iLastScanPos < 0 )
2294   {
2295     return;
2296   }
2297 
2298   Double  d64BestCost         = 0;
2299   Int     ui16CtxCbf          = 0;
2300   Int     iBestLastIdxP1      = 0;
2301   if( !pcCU->isIntra( uiAbsPartIdx ) && isLuma(compID) && pcCU->getTransformIdx( uiAbsPartIdx ) == 0 )
2302   {
2303     ui16CtxCbf   = 0;
2304     d64BestCost  = d64BlockUncodedCost + xGetICost( m_pcEstBitsSbac->blockRootCbpBits[ ui16CtxCbf ][ 0 ] );
2305     d64BaseCost += xGetICost( m_pcEstBitsSbac->blockRootCbpBits[ ui16CtxCbf ][ 1 ] );
2306   }
2307   else
2308   {
2309     ui16CtxCbf   = pcCU->getCtxQtCbf( rTu, channelType );
2310     ui16CtxCbf  += getCBFContextOffset(compID);
2311     d64BestCost  = d64BlockUncodedCost + xGetICost( m_pcEstBitsSbac->blockCbpBits[ ui16CtxCbf ][ 0 ] );
2312     d64BaseCost += xGetICost( m_pcEstBitsSbac->blockCbpBits[ ui16CtxCbf ][ 1 ] );
2313   }
2314 
2315 
2316   Bool bFoundLast = false;
2317   for (Int iCGScanPos = iCGLastScanPos; iCGScanPos >= 0; iCGScanPos--)
2318   {
2319     UInt uiCGBlkPos = codingParameters.scanCG[ iCGScanPos ];
2320 
2321     d64BaseCost -= pdCostCoeffGroupSig [ iCGScanPos ];
2322     if (uiSigCoeffGroupFlag[ uiCGBlkPos ])
2323     {
2324       for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
2325       {
2326         iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
2327 
2328         if (iScanPos > iLastScanPos) continue;
2329         UInt   uiBlkPos     = codingParameters.scan[iScanPos];
2330 
2331         if( piDstCoeff[ uiBlkPos ] )
2332         {
2333           UInt   uiPosY       = uiBlkPos >> uiLog2BlockWidth;
2334           UInt   uiPosX       = uiBlkPos - ( uiPosY << uiLog2BlockWidth );
2335 
2336           Double d64CostLast= codingParameters.scanType == SCAN_VER ? xGetRateLast( uiPosY, uiPosX, compID ) : xGetRateLast( uiPosX, uiPosY, compID );
2337           Double totalCost = d64BaseCost + d64CostLast - pdCostSig[ iScanPos ];
2338 
2339           if( totalCost < d64BestCost )
2340           {
2341             iBestLastIdxP1  = iScanPos + 1;
2342             d64BestCost     = totalCost;
2343           }
2344           if( piDstCoeff[ uiBlkPos ] > 1 )
2345           {
2346             bFoundLast = true;
2347             break;
2348           }
2349           d64BaseCost      -= pdCostCoeff[ iScanPos ];
2350           d64BaseCost      += pdCostCoeff0[ iScanPos ];
2351         }
2352         else
2353         {
2354           d64BaseCost      -= pdCostSig[ iScanPos ];
2355         }
2356       } //end for
2357       if (bFoundLast)
2358       {
2359         break;
2360       }
2361     } // end if (uiSigCoeffGroupFlag[ uiCGBlkPos ])
2362   } // end for
2363 
2364 
2365   for ( Int scanPos = 0; scanPos < iBestLastIdxP1; scanPos++ )
2366   {
2367     Int blkPos = codingParameters.scan[ scanPos ];
2368     TCoeff level = piDstCoeff[ blkPos ];
2369     uiAbsSum += level;
2370     piDstCoeff[ blkPos ] = ( plSrcCoeff[ blkPos ] < 0 ) ? -level : level;
2371   }
2372 
2373   //===== clean uncoded coefficients =====
2374   for ( Int scanPos = iBestLastIdxP1; scanPos <= iLastScanPos; scanPos++ )
2375   {
2376     piDstCoeff[ codingParameters.scan[ scanPos ] ] = 0;
2377   }
2378 
2379 
2380   if( pcCU->getSlice()->getPPS()->getSignHideFlag() && uiAbsSum>=2)
2381   {
2382     const Double inverseQuantScale = Double(g_invQuantScales[cQP.rem]);
2383     Int64 rdFactor = (Int64)(inverseQuantScale * inverseQuantScale * (1 << (2 * cQP.per))
2384                              / m_dLambda / 16 / (1 << (2 * DISTORTION_PRECISION_ADJUSTMENT(g_bitDepth[channelType] - 8)))
2385                              + 0.5);
2386 
2387     Int lastCG = -1;
2388     Int absSum = 0 ;
2389     Int n ;
2390 
2391     for( Int subSet = (uiWidth*uiHeight-1) >> MLS_CG_SIZE; subSet >= 0; subSet-- )
2392     {
2393       Int  subPos     = subSet << MLS_CG_SIZE;
2394       Int  firstNZPosInCG=uiCGSize , lastNZPosInCG=-1 ;
2395       absSum = 0 ;
2396 
2397       for(n = uiCGSize-1; n >= 0; --n )
2398       {
2399         if( piDstCoeff[ codingParameters.scan[ n + subPos ]] )
2400         {
2401           lastNZPosInCG = n;
2402           break;
2403         }
2404       }
2405 
2406       for(n = 0; n <uiCGSize; n++ )
2407       {
2408         if( piDstCoeff[ codingParameters.scan[ n + subPos ]] )
2409         {
2410           firstNZPosInCG = n;
2411           break;
2412         }
2413       }
2414 
2415       for(n = firstNZPosInCG; n <=lastNZPosInCG; n++ )
2416       {
2417         absSum += Int(piDstCoeff[ codingParameters.scan[ n + subPos ]]);
2418       }
2419 
2420       if(lastNZPosInCG>=0 && lastCG==-1)
2421       {
2422         lastCG = 1;
2423       }
2424 
2425       if( lastNZPosInCG-firstNZPosInCG>=SBH_THRESHOLD )
2426       {
2427         UInt signbit = (piDstCoeff[codingParameters.scan[subPos+firstNZPosInCG]]>0?0:1);
2428         if( signbit!=(absSum&0x1) )  // hide but need tune
2429         {
2430           // calculate the cost
2431           Int64 minCostInc = MAX_INT64, curCost = MAX_INT64;
2432           Int minPos = -1, finalChange = 0, curChange = 0;
2433 
2434           for( n = (lastCG==1?lastNZPosInCG:uiCGSize-1) ; n >= 0; --n )
2435           {
2436             UInt uiBlkPos   = codingParameters.scan[ n + subPos ];
2437             if(piDstCoeff[ uiBlkPos ] != 0 )
2438             {
2439               Int64 costUp   = rdFactor * ( - deltaU[uiBlkPos] ) + rateIncUp[uiBlkPos];
2440               Int64 costDown = rdFactor * (   deltaU[uiBlkPos] ) + rateIncDown[uiBlkPos]
2441                                -   ((abs(piDstCoeff[uiBlkPos]) == 1) ? sigRateDelta[uiBlkPos] : 0);
2442 
2443               if(lastCG==1 && lastNZPosInCG==n && abs(piDstCoeff[uiBlkPos])==1)
2444               {
2445                 costDown -= (4<<15);
2446               }
2447 
2448               if(costUp<costDown)
2449               {
2450                 curCost = costUp;
2451                 curChange =  1;
2452               }
2453               else
2454               {
2455                 curChange = -1;
2456                 if(n==firstNZPosInCG && abs(piDstCoeff[uiBlkPos])==1)
2457                 {
2458                   curCost = MAX_INT64;
2459                 }
2460                 else
2461                 {
2462                   curCost = costDown;
2463                 }
2464               }
2465             }
2466             else
2467             {
2468               curCost = rdFactor * ( - (abs(deltaU[uiBlkPos])) ) + (1<<15) + rateIncUp[uiBlkPos] + sigRateDelta[uiBlkPos] ;
2469               curChange = 1 ;
2470 
2471               if(n<firstNZPosInCG)
2472               {
2473                 UInt thissignbit = (plSrcCoeff[uiBlkPos]>=0?0:1);
2474                 if(thissignbit != signbit )
2475                 {
2476                   curCost = MAX_INT64;
2477                 }
2478               }
2479             }
2480 
2481             if( curCost<minCostInc)
2482             {
2483               minCostInc = curCost;
2484               finalChange = curChange;
2485               minPos = uiBlkPos;
2486             }
2487           }
2488 
2489           if(piDstCoeff[minPos] == entropyCodingMaximum || piDstCoeff[minPos] == entropyCodingMinimum)
2490           {
2491             finalChange = -1;
2492           }
2493 
2494           if(plSrcCoeff[minPos]>=0)
2495           {
2496             piDstCoeff[minPos] += finalChange ;
2497           }
2498           else
2499           {
2500             piDstCoeff[minPos] -= finalChange ;
2501           }
2502         }
2503       }
2504 
2505       if(lastCG==1)
2506       {
2507         lastCG=0 ;
2508       }
2509     }
2510   }
2511 }
2512 
2513 
2514 /** Pattern decision for context derivation process of significant_coeff_flag
2515  * \param sigCoeffGroupFlag pointer to prior coded significant coeff group
2516  * \param uiCGPosX column of current coefficient group
2517  * \param uiCGPosY row of current coefficient group
2518  * \param width width of the block
2519  * \param height height of the block
2520  * \returns pattern for current coefficient group
2521  */
calcPatternSigCtx(const UInt * sigCoeffGroupFlag,UInt uiCGPosX,UInt uiCGPosY,UInt widthInGroups,UInt heightInGroups)2522 Int  TComTrQuant::calcPatternSigCtx( const UInt* sigCoeffGroupFlag, UInt uiCGPosX, UInt uiCGPosY, UInt widthInGroups, UInt heightInGroups )
2523 {
2524   if ((widthInGroups <= 1) && (heightInGroups <= 1)) return 0;
2525 
2526   const Bool rightAvailable = uiCGPosX < (widthInGroups  - 1);
2527   const Bool belowAvailable = uiCGPosY < (heightInGroups - 1);
2528 
2529   UInt sigRight = 0;
2530   UInt sigLower = 0;
2531 
2532   if (rightAvailable) sigRight = ((sigCoeffGroupFlag[ (uiCGPosY * widthInGroups) + uiCGPosX + 1 ] != 0) ? 1 : 0);
2533   if (belowAvailable) sigLower = ((sigCoeffGroupFlag[ (uiCGPosY + 1) * widthInGroups + uiCGPosX ] != 0) ? 1 : 0);
2534 
2535   return sigRight + (sigLower << 1);
2536 }
2537 
2538 
2539 /** Context derivation process of coeff_abs_significant_flag
2540  * \param patternSigCtx pattern for current coefficient group
2541  * \param codingParameters coding parmeters for the TU (includes the scan)
2542  * \param scanPosition current position in scan order
2543  * \param log2BlockWidth log2 width of the block
2544  * \param log2BlockHeight log2 height of the block
2545  * \param ChannelType channel type (CHANNEL_TYPE_LUMA/CHROMA)
2546  * \returns ctxInc for current scan position
2547  */
getSigCtxInc(Int patternSigCtx,const TUEntropyCodingParameters & codingParameters,const Int scanPosition,const Int log2BlockWidth,const Int log2BlockHeight,const ChannelType chanType)2548 Int TComTrQuant::getSigCtxInc    (       Int                        patternSigCtx,
2549                                    const TUEntropyCodingParameters &codingParameters,
2550                                    const Int                        scanPosition,
2551                                    const Int                        log2BlockWidth,
2552                                    const Int                        log2BlockHeight,
2553                                    const ChannelType                chanType)
2554 {
2555   if (codingParameters.firstSignificanceMapContext == significanceMapContextSetStart[chanType][CONTEXT_TYPE_SINGLE])
2556   {
2557     //single context mode
2558     return significanceMapContextSetStart[chanType][CONTEXT_TYPE_SINGLE];
2559   }
2560 
2561   const UInt rasterPosition = codingParameters.scan[scanPosition];
2562   const UInt posY           = rasterPosition >> log2BlockWidth;
2563   const UInt posX           = rasterPosition - (posY << log2BlockWidth);
2564 
2565   if ((posX + posY) == 0) return 0; //special case for the DC context variable
2566 
2567   Int offset = MAX_INT;
2568 
2569   if ((log2BlockWidth == 2) && (log2BlockHeight == 2)) //4x4
2570   {
2571     offset = ctxIndMap4x4[ (4 * posY) + posX ];
2572   }
2573   else
2574   {
2575     Int cnt = 0;
2576 
2577     switch (patternSigCtx)
2578     {
2579       //------------------
2580 
2581       case 0: //neither neighbouring group is significant
2582         {
2583           const Int posXinSubset     = posX & ((1 << MLS_CG_LOG2_WIDTH)  - 1);
2584           const Int posYinSubset     = posY & ((1 << MLS_CG_LOG2_HEIGHT) - 1);
2585           const Int posTotalInSubset = posXinSubset + posYinSubset;
2586 
2587           //first N coefficients in scan order use 2; the next few use 1; the rest use 0.
2588           const UInt context1Threshold = NEIGHBOURHOOD_00_CONTEXT_1_THRESHOLD_4x4;
2589           const UInt context2Threshold = NEIGHBOURHOOD_00_CONTEXT_2_THRESHOLD_4x4;
2590 
2591           cnt = (posTotalInSubset >= context1Threshold) ? 0 : ((posTotalInSubset >= context2Threshold) ? 1 : 2);
2592         }
2593         break;
2594 
2595       //------------------
2596 
2597       case 1: //right group is significant, below is not
2598         {
2599           const Int posYinSubset = posY & ((1 << MLS_CG_LOG2_HEIGHT) - 1);
2600           const Int groupHeight  = 1 << MLS_CG_LOG2_HEIGHT;
2601 
2602           cnt = (posYinSubset >= (groupHeight >> 1)) ? 0 : ((posYinSubset >= (groupHeight >> 2)) ? 1 : 2); //top quarter uses 2; second-from-top quarter uses 1; bottom half uses 0
2603         }
2604         break;
2605 
2606       //------------------
2607 
2608       case 2: //below group is significant, right is not
2609         {
2610           const Int posXinSubset = posX & ((1 << MLS_CG_LOG2_WIDTH)  - 1);
2611           const Int groupWidth   = 1 << MLS_CG_LOG2_WIDTH;
2612 
2613           cnt = (posXinSubset >= (groupWidth >> 1)) ? 0 : ((posXinSubset >= (groupWidth >> 2)) ? 1 : 2); //left quarter uses 2; second-from-left quarter uses 1; right half uses 0
2614         }
2615         break;
2616 
2617       //------------------
2618 
2619       case 3: //both neighbouring groups are significant
2620         {
2621           cnt = 2;
2622         }
2623         break;
2624 
2625       //------------------
2626 
2627       default:
2628         std::cerr << "ERROR: Invalid patternSigCtx \"" << Int(patternSigCtx) << "\" in getSigCtxInc" << std::endl;
2629         exit(1);
2630         break;
2631     }
2632 
2633     //------------------------------------------------
2634 
2635     const Bool notFirstGroup = ((posX >> MLS_CG_LOG2_WIDTH) + (posY >> MLS_CG_LOG2_HEIGHT)) > 0;
2636 
2637     offset = (notFirstGroup ? notFirstGroupNeighbourhoodContextOffset[chanType] : 0) + cnt;
2638   }
2639 
2640   return codingParameters.firstSignificanceMapContext + offset;
2641 }
2642 
2643 
2644 /** Get the best level in RD sense
2645  * \param rd64CodedCost reference to coded cost
2646  * \param rd64CodedCost0 reference to cost when coefficient is 0
2647  * \param rd64CodedCostSig reference to cost of significant coefficient
2648  * \param lLevelDouble reference to unscaled quantized level
2649  * \param uiMaxAbsLevel scaled quantized level
2650  * \param ui16CtxNumSig current ctxInc for coeff_abs_significant_flag
2651  * \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
2652  * \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
2653  * \param ui16AbsGoRice current Rice parameter for coeff_abs_level_minus3
2654  * \param iQBits quantization step size
2655  * \param dTemp correction factor
2656  * \param bLast indicates if the coefficient is the last significant
2657  * \returns best quantized transform level for given scan position
2658  * This method calculates the best quantized transform level for a given scan position.
2659  */
xGetCodedLevel(Double & rd64CodedCost,Double & rd64CodedCost0,Double & rd64CodedCostSig,Intermediate_Int lLevelDouble,UInt uiMaxAbsLevel,UShort ui16CtxNumSig,UShort ui16CtxNumOne,UShort ui16CtxNumAbs,UShort ui16AbsGoRice,UInt c1Idx,UInt c2Idx,Int iQBits,Double errorScale,Bool bLast,Bool useLimitedPrefixLength,ChannelType channelType) const2660 __inline UInt TComTrQuant::xGetCodedLevel ( Double&          rd64CodedCost,
2661                                             Double&          rd64CodedCost0,
2662                                             Double&          rd64CodedCostSig,
2663                                             Intermediate_Int lLevelDouble,
2664                                             UInt             uiMaxAbsLevel,
2665                                             UShort           ui16CtxNumSig,
2666                                             UShort           ui16CtxNumOne,
2667                                             UShort           ui16CtxNumAbs,
2668                                             UShort           ui16AbsGoRice,
2669                                             UInt             c1Idx,
2670                                             UInt             c2Idx,
2671                                             Int              iQBits,
2672                                             Double           errorScale,
2673                                             Bool             bLast,
2674                                             Bool             useLimitedPrefixLength,
2675                                             ChannelType      channelType
2676                                             ) const
2677 {
2678   Double dCurrCostSig   = 0;
2679   UInt   uiBestAbsLevel = 0;
2680 
2681   if( !bLast && uiMaxAbsLevel < 3 )
2682   {
2683     rd64CodedCostSig    = xGetRateSigCoef( 0, ui16CtxNumSig );
2684     rd64CodedCost       = rd64CodedCost0 + rd64CodedCostSig;
2685     if( uiMaxAbsLevel == 0 )
2686     {
2687       return uiBestAbsLevel;
2688     }
2689   }
2690   else
2691   {
2692     rd64CodedCost       = MAX_DOUBLE;
2693   }
2694 
2695   if( !bLast )
2696   {
2697     dCurrCostSig        = xGetRateSigCoef( 1, ui16CtxNumSig );
2698   }
2699 
2700   UInt uiMinAbsLevel    = ( uiMaxAbsLevel > 1 ? uiMaxAbsLevel - 1 : 1 );
2701   for( Int uiAbsLevel  = uiMaxAbsLevel; uiAbsLevel >= uiMinAbsLevel ; uiAbsLevel-- )
2702   {
2703     Double dErr         = Double( lLevelDouble  - ( Intermediate_Int(uiAbsLevel) << iQBits ) );
2704     Double dCurrCost    = dErr * dErr * errorScale + xGetICost( xGetICRate( uiAbsLevel, ui16CtxNumOne, ui16CtxNumAbs, ui16AbsGoRice, c1Idx, c2Idx, useLimitedPrefixLength, channelType ) );
2705     dCurrCost          += dCurrCostSig;
2706 
2707     if( dCurrCost < rd64CodedCost )
2708     {
2709       uiBestAbsLevel    = uiAbsLevel;
2710       rd64CodedCost     = dCurrCost;
2711       rd64CodedCostSig  = dCurrCostSig;
2712     }
2713   }
2714 
2715   return uiBestAbsLevel;
2716 }
2717 
2718 /** Calculates the cost for specific absolute transform level
2719  * \param uiAbsLevel scaled quantized level
2720  * \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
2721  * \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
2722  * \param ui16AbsGoRice Rice parameter for coeff_abs_level_minus3
2723  * \returns cost of given absolute transform level
2724  */
xGetICRate(UInt uiAbsLevel,UShort ui16CtxNumOne,UShort ui16CtxNumAbs,UShort ui16AbsGoRice,UInt c1Idx,UInt c2Idx,Bool useLimitedPrefixLength,ChannelType channelType) const2725 __inline Int TComTrQuant::xGetICRate         ( UInt                            uiAbsLevel,
2726                                                UShort                          ui16CtxNumOne,
2727                                                UShort                          ui16CtxNumAbs,
2728                                                UShort                          ui16AbsGoRice,
2729                                                UInt                            c1Idx,
2730                                                UInt                            c2Idx,
2731                                                Bool                            useLimitedPrefixLength,
2732                                                ChannelType                     channelType
2733                                                ) const
2734 {
2735   Int  iRate      = Int(xGetIEPRate()); // cost of sign bit
2736   UInt baseLevel  = (c1Idx < C1FLAG_NUMBER) ? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
2737 
2738   if ( uiAbsLevel >= baseLevel )
2739   {
2740     UInt symbol     = uiAbsLevel - baseLevel;
2741     UInt length;
2742     if (symbol < (COEF_REMAIN_BIN_REDUCTION << ui16AbsGoRice))
2743     {
2744       length = symbol>>ui16AbsGoRice;
2745       iRate += (length+1+ui16AbsGoRice)<< 15;
2746     }
2747     else if (useLimitedPrefixLength)
2748     {
2749       const UInt maximumPrefixLength = (32 - (COEF_REMAIN_BIN_REDUCTION + g_maxTrDynamicRange[channelType]));
2750 
2751       UInt prefixLength = 0;
2752       UInt suffix       = (symbol >> ui16AbsGoRice) - COEF_REMAIN_BIN_REDUCTION;
2753 
2754       while ((prefixLength < maximumPrefixLength) && (suffix > ((2 << prefixLength) - 2)))
2755       {
2756         prefixLength++;
2757       }
2758 
2759       const UInt suffixLength = (prefixLength == maximumPrefixLength) ? (g_maxTrDynamicRange[channelType] - ui16AbsGoRice) : (prefixLength + 1/*separator*/);
2760 
2761       iRate += (COEF_REMAIN_BIN_REDUCTION + prefixLength + suffixLength + ui16AbsGoRice) << 15;
2762     }
2763     else
2764     {
2765       length = ui16AbsGoRice;
2766       symbol  = symbol - ( COEF_REMAIN_BIN_REDUCTION << ui16AbsGoRice);
2767       while (symbol >= (1<<length))
2768       {
2769         symbol -=  (1<<(length++));
2770       }
2771       iRate += (COEF_REMAIN_BIN_REDUCTION+length+1-ui16AbsGoRice+length)<< 15;
2772     }
2773 
2774     if (c1Idx < C1FLAG_NUMBER)
2775     {
2776       iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
2777 
2778       if (c2Idx < C2FLAG_NUMBER)
2779       {
2780         iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 1 ];
2781       }
2782     }
2783   }
2784   else if( uiAbsLevel == 1 )
2785   {
2786     iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 0 ];
2787   }
2788   else if( uiAbsLevel == 2 )
2789   {
2790     iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
2791     iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 0 ];
2792   }
2793   else
2794   {
2795     iRate = 0;
2796   }
2797 
2798   return  iRate;
2799 }
2800 
xGetRateSigCoeffGroup(UShort uiSignificanceCoeffGroup,UShort ui16CtxNumSig) const2801 __inline Double TComTrQuant::xGetRateSigCoeffGroup  ( UShort                    uiSignificanceCoeffGroup,
2802                                                 UShort                          ui16CtxNumSig ) const
2803 {
2804   return xGetICost( m_pcEstBitsSbac->significantCoeffGroupBits[ ui16CtxNumSig ][ uiSignificanceCoeffGroup ] );
2805 }
2806 
2807 /** Calculates the cost of signaling the last significant coefficient in the block
2808  * \param uiPosX X coordinate of the last significant coefficient
2809  * \param uiPosY Y coordinate of the last significant coefficient
2810  * \returns cost of last significant coefficient
2811  */
2812 /*
2813  * \param uiWidth width of the transform unit (TU)
2814 */
xGetRateLast(const UInt uiPosX,const UInt uiPosY,const ComponentID component) const2815 __inline Double TComTrQuant::xGetRateLast   ( const UInt                      uiPosX,
2816                                               const UInt                      uiPosY,
2817                                               const ComponentID               component  ) const
2818 {
2819   UInt uiCtxX   = g_uiGroupIdx[uiPosX];
2820   UInt uiCtxY   = g_uiGroupIdx[uiPosY];
2821 
2822   Double uiCost = m_pcEstBitsSbac->lastXBits[toChannelType(component)][ uiCtxX ] + m_pcEstBitsSbac->lastYBits[toChannelType(component)][ uiCtxY ];
2823 
2824   if( uiCtxX > 3 )
2825   {
2826     uiCost += xGetIEPRate() * ((uiCtxX-2)>>1);
2827   }
2828   if( uiCtxY > 3 )
2829   {
2830     uiCost += xGetIEPRate() * ((uiCtxY-2)>>1);
2831   }
2832   return xGetICost( uiCost );
2833 }
2834 
2835  /** Calculates the cost for specific absolute transform level
2836  * \param uiAbsLevel scaled quantized level
2837  * \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
2838  * \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
2839  * \param ui16CtxBase current global offset for coeff_abs_level_greater1 and coeff_abs_level_greater2
2840  * \returns cost of given absolute transform level
2841  */
xGetRateSigCoef(UShort uiSignificance,UShort ui16CtxNumSig) const2842 __inline Double TComTrQuant::xGetRateSigCoef  ( UShort                          uiSignificance,
2843                                                 UShort                          ui16CtxNumSig ) const
2844 {
2845   return xGetICost( m_pcEstBitsSbac->significantBits[ ui16CtxNumSig ][ uiSignificance ] );
2846 }
2847 
2848 /** Get the cost for a specific rate
2849  * \param dRate rate of a bit
2850  * \returns cost at the specific rate
2851  */
xGetICost(Double dRate) const2852 __inline Double TComTrQuant::xGetICost        ( Double                          dRate         ) const
2853 {
2854   return m_dLambda * dRate;
2855 }
2856 
2857 /** Get the cost of an equal probable bit
2858  * \returns cost of equal probable bit
2859  */
xGetIEPRate() const2860 __inline Double TComTrQuant::xGetIEPRate      (                                               ) const
2861 {
2862   return 32768;
2863 }
2864 
2865 /** Context derivation process of coeff_abs_significant_flag
2866  * \param uiSigCoeffGroupFlag significance map of L1
2867  * \param uiBlkX column of current scan position
2868  * \param uiBlkY row of current scan position
2869  * \param uiLog2BlkSize log2 value of block size
2870  * \returns ctxInc for current scan position
2871  */
getSigCoeffGroupCtxInc(const UInt * uiSigCoeffGroupFlag,const UInt uiCGPosX,const UInt uiCGPosY,const UInt widthInGroups,const UInt heightInGroups)2872 UInt TComTrQuant::getSigCoeffGroupCtxInc  (const UInt*  uiSigCoeffGroupFlag,
2873                                            const UInt   uiCGPosX,
2874                                            const UInt   uiCGPosY,
2875                                            const UInt   widthInGroups,
2876                                            const UInt   heightInGroups)
2877 {
2878   UInt sigRight = 0;
2879   UInt sigLower = 0;
2880 
2881   if (uiCGPosX < (widthInGroups  - 1)) sigRight = ((uiSigCoeffGroupFlag[ (uiCGPosY * widthInGroups) + uiCGPosX + 1 ] != 0) ? 1 : 0);
2882   if (uiCGPosY < (heightInGroups - 1)) sigLower = ((uiSigCoeffGroupFlag[ (uiCGPosY + 1) * widthInGroups + uiCGPosX ] != 0) ? 1 : 0);
2883 
2884   return ((sigRight + sigLower) != 0) ? 1 : 0;
2885 }
2886 
2887 
2888 /** set quantized matrix coefficient for encode
2889  * \param scalingList quantaized matrix address
2890  */
setScalingList(TComScalingList * scalingList,const ChromaFormat format)2891 Void TComTrQuant::setScalingList(TComScalingList *scalingList, const ChromaFormat format)
2892 {
2893   const Int minimumQp = 0;
2894   const Int maximumQp = SCALING_LIST_REM_NUM;
2895 
2896   for(UInt size = 0; size < SCALING_LIST_SIZE_NUM; size++)
2897   {
2898     for(UInt list = 0; list < SCALING_LIST_NUM; list++)
2899     {
2900       for(Int qp = minimumQp; qp < maximumQp; qp++)
2901       {
2902         xSetScalingListEnc(scalingList,list,size,qp,format);
2903         xSetScalingListDec(scalingList,list,size,qp,format);
2904         setErrScaleCoeff(list,size,qp);
2905       }
2906     }
2907   }
2908 }
2909 /** set quantized matrix coefficient for decode
2910  * \param scalingList quantaized matrix address
2911  */
setScalingListDec(TComScalingList * scalingList,const ChromaFormat format)2912 Void TComTrQuant::setScalingListDec(TComScalingList *scalingList, const ChromaFormat format)
2913 {
2914   const Int minimumQp = 0;
2915   const Int maximumQp = SCALING_LIST_REM_NUM;
2916 
2917   for(UInt size = 0; size < SCALING_LIST_SIZE_NUM; size++)
2918   {
2919     for(UInt list = 0; list < SCALING_LIST_NUM; list++)
2920     {
2921       for(Int qp = minimumQp; qp < maximumQp; qp++)
2922       {
2923         xSetScalingListDec(scalingList,list,size,qp,format);
2924       }
2925     }
2926   }
2927 }
2928 /** set error scale coefficients
2929  * \param list List ID
2930  * \param uiSize Size
2931  * \param uiQP Quantization parameter
2932  */
setErrScaleCoeff(UInt list,UInt size,Int qp)2933 Void TComTrQuant::setErrScaleCoeff(UInt list, UInt size, Int qp)
2934 {
2935   const UInt uiLog2TrSize = g_aucConvertToBit[ g_scalingListSizeX[size] ] + 2;
2936   const ChannelType channelType = ((list == 0) || (list == MAX_NUM_COMPONENT)) ? CHANNEL_TYPE_LUMA : CHANNEL_TYPE_CHROMA;
2937 
2938   const Int iTransformShift = getTransformShift(channelType, uiLog2TrSize);  // Represents scaling through forward transform
2939 
2940   UInt i,uiMaxNumCoeff = g_scalingListSize[size];
2941   Int *piQuantcoeff;
2942   Double *pdErrScale;
2943   piQuantcoeff   = getQuantCoeff(list, qp,size);
2944   pdErrScale     = getErrScaleCoeff(list, size, qp);
2945 
2946   Double dErrScale = (Double)(1<<SCALE_BITS);                                // Compensate for scaling of bitcount in Lagrange cost function
2947   dErrScale = dErrScale*pow(2.0,(-2.0*iTransformShift));                     // Compensate for scaling through forward transform
2948 
2949   for(i=0;i<uiMaxNumCoeff;i++)
2950   {
2951     pdErrScale[i] =  dErrScale / piQuantcoeff[i] / piQuantcoeff[i] / (1 << DISTORTION_PRECISION_ADJUSTMENT(2 * (g_bitDepth[channelType] - 8)));
2952   }
2953 
2954   getErrScaleCoeffNoScalingList(list, size, qp) = dErrScale / g_quantScales[qp] / g_quantScales[qp] / (1 << DISTORTION_PRECISION_ADJUSTMENT(2 * (g_bitDepth[channelType] - 8)));
2955 }
2956 
2957 /** set quantized matrix coefficient for encode
2958  * \param scalingList quantaized matrix address
2959  * \param listId List index
2960  * \param sizeId size index
2961  * \param uiQP Quantization parameter
2962  */
xSetScalingListEnc(TComScalingList * scalingList,UInt listId,UInt sizeId,Int qp,const ChromaFormat format)2963 Void TComTrQuant::xSetScalingListEnc(TComScalingList *scalingList, UInt listId, UInt sizeId, Int qp, const ChromaFormat format)
2964 {
2965   UInt width  = g_scalingListSizeX[sizeId];
2966   UInt height = g_scalingListSizeX[sizeId];
2967   UInt ratio  = g_scalingListSizeX[sizeId]/min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]);
2968   Int *quantcoeff;
2969   Int *coeff  = scalingList->getScalingListAddress(sizeId,listId);
2970   quantcoeff  = getQuantCoeff(listId, qp, sizeId);
2971 
2972   Int quantScales = g_quantScales[qp];
2973 
2974   processScalingListEnc(coeff,
2975                         quantcoeff,
2976                         (quantScales << LOG2_SCALING_LIST_NEUTRAL_VALUE),
2977                         height, width, ratio,
2978                         min(MAX_MATRIX_SIZE_NUM, (Int)g_scalingListSizeX[sizeId]),
2979                         scalingList->getScalingListDC(sizeId,listId));
2980 }
2981 
2982 /** set quantized matrix coefficient for decode
2983  * \param scalingList quantaized matrix address
2984  * \param list List index
2985  * \param size size index
2986  * \param uiQP Quantization parameter
2987  */
xSetScalingListDec(TComScalingList * scalingList,UInt listId,UInt sizeId,Int qp,const ChromaFormat format)2988 Void TComTrQuant::xSetScalingListDec(TComScalingList *scalingList, UInt listId, UInt sizeId, Int qp, const ChromaFormat format)
2989 {
2990   UInt width  = g_scalingListSizeX[sizeId];
2991   UInt height = g_scalingListSizeX[sizeId];
2992   UInt ratio  = g_scalingListSizeX[sizeId]/min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]);
2993   Int *dequantcoeff;
2994   Int *coeff  = scalingList->getScalingListAddress(sizeId,listId);
2995 
2996   dequantcoeff = getDequantCoeff(listId, qp, sizeId);
2997 
2998   Int invQuantScale = g_invQuantScales[qp];
2999 
3000   processScalingListDec(coeff,
3001                         dequantcoeff,
3002                         invQuantScale,
3003                         height, width, ratio,
3004                         min(MAX_MATRIX_SIZE_NUM, (Int)g_scalingListSizeX[sizeId]),
3005                         scalingList->getScalingListDC(sizeId,listId));
3006 }
3007 
3008 /** set flat matrix value to quantized coefficient
3009  */
setFlatScalingList(const ChromaFormat format)3010 Void TComTrQuant::setFlatScalingList(const ChromaFormat format)
3011 {
3012   const Int minimumQp = 0;
3013   const Int maximumQp = SCALING_LIST_REM_NUM;
3014 
3015   for(UInt size = 0; size < SCALING_LIST_SIZE_NUM; size++)
3016   {
3017     for(UInt list = 0; list < SCALING_LIST_NUM; list++)
3018     {
3019       for(Int qp = minimumQp; qp < maximumQp; qp++)
3020       {
3021         xsetFlatScalingList(list,size,qp,format);
3022         setErrScaleCoeff(list,size,qp);
3023       }
3024     }
3025   }
3026 }
3027 
3028 /** set flat matrix value to quantized coefficient
3029  * \param list List ID
3030  * \param uiQP Quantization parameter
3031  * \param uiSize Size
3032  */
xsetFlatScalingList(UInt list,UInt size,Int qp,const ChromaFormat format)3033 Void TComTrQuant::xsetFlatScalingList(UInt list, UInt size, Int qp, const ChromaFormat format)
3034 {
3035   UInt i,num = g_scalingListSize[size];
3036   Int *quantcoeff;
3037   Int *dequantcoeff;
3038 
3039   Int quantScales    = g_quantScales   [qp];
3040   Int invQuantScales = g_invQuantScales[qp] << 4;
3041 
3042   quantcoeff   = getQuantCoeff(list, qp, size);
3043   dequantcoeff = getDequantCoeff(list, qp, size);
3044 
3045   for(i=0;i<num;i++)
3046   {
3047     *quantcoeff++ = quantScales;
3048     *dequantcoeff++ = invQuantScales;
3049   }
3050 }
3051 
3052 /** set quantized matrix coefficient for encode
3053  * \param coeff quantaized matrix address
3054  * \param quantcoeff quantaized matrix address
3055  * \param quantScales Q(QP%6)
3056  * \param height height
3057  * \param width width
3058  * \param ratio ratio for upscale
3059  * \param sizuNum matrix size
3060  * \param dc dc parameter
3061  */
processScalingListEnc(Int * coeff,Int * quantcoeff,Int quantScales,UInt height,UInt width,UInt ratio,Int sizuNum,UInt dc)3062 Void TComTrQuant::processScalingListEnc( Int *coeff, Int *quantcoeff, Int quantScales, UInt height, UInt width, UInt ratio, Int sizuNum, UInt dc)
3063 {
3064   for(UInt j=0;j<height;j++)
3065   {
3066     for(UInt i=0;i<width;i++)
3067     {
3068       quantcoeff[j*width + i] = quantScales / coeff[sizuNum * (j / ratio) + i / ratio];
3069     }
3070   }
3071 
3072   if(ratio > 1)
3073   {
3074     quantcoeff[0] = quantScales / dc;
3075   }
3076 }
3077 
3078 /** set quantized matrix coefficient for decode
3079  * \param coeff quantaized matrix address
3080  * \param dequantcoeff quantaized matrix address
3081  * \param invQuantScales IQ(QP%6))
3082  * \param height height
3083  * \param width width
3084  * \param ratio ratio for upscale
3085  * \param sizuNum matrix size
3086  * \param dc dc parameter
3087  */
processScalingListDec(Int * coeff,Int * dequantcoeff,Int invQuantScales,UInt height,UInt width,UInt ratio,Int sizuNum,UInt dc)3088 Void TComTrQuant::processScalingListDec( Int *coeff, Int *dequantcoeff, Int invQuantScales, UInt height, UInt width, UInt ratio, Int sizuNum, UInt dc)
3089 {
3090   for(UInt j=0;j<height;j++)
3091   {
3092     for(UInt i=0;i<width;i++)
3093     {
3094       dequantcoeff[j*width + i] = invQuantScales * coeff[sizuNum * (j / ratio) + i / ratio];
3095     }
3096   }
3097 
3098   if(ratio > 1)
3099   {
3100     dequantcoeff[0] = invQuantScales * dc;
3101   }
3102 }
3103 
3104 /** initialization process of scaling list array
3105  */
initScalingList()3106 Void TComTrQuant::initScalingList()
3107 {
3108   for(UInt sizeId = 0; sizeId < SCALING_LIST_SIZE_NUM; sizeId++)
3109   {
3110     for(UInt qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
3111     {
3112       for(UInt listId = 0; listId < SCALING_LIST_NUM; listId++)
3113       {
3114         m_quantCoef   [sizeId][listId][qp] = new Int    [g_scalingListSize[sizeId]];
3115         m_dequantCoef [sizeId][listId][qp] = new Int    [g_scalingListSize[sizeId]];
3116         m_errScale    [sizeId][listId][qp] = new Double [g_scalingListSize[sizeId]];
3117       } // listID loop
3118     }
3119   }
3120 }
3121 
3122 /** destroy quantization matrix array
3123  */
destroyScalingList()3124 Void TComTrQuant::destroyScalingList()
3125 {
3126   for(UInt sizeId = 0; sizeId < SCALING_LIST_SIZE_NUM; sizeId++)
3127   {
3128     for(UInt listId = 0; listId < SCALING_LIST_NUM; listId++)
3129     {
3130       for(UInt qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
3131       {
3132         if(m_quantCoef   [sizeId][listId][qp]) delete [] m_quantCoef   [sizeId][listId][qp];
3133         if(m_dequantCoef [sizeId][listId][qp]) delete [] m_dequantCoef [sizeId][listId][qp];
3134         if(m_errScale    [sizeId][listId][qp]) delete [] m_errScale    [sizeId][listId][qp];
3135       }
3136     }
3137   }
3138 }
3139 
transformSkipQuantOneSample(TComTU & rTu,const ComponentID compID,const Pel resiDiff,TCoeff * pcCoeff,const UInt uiPos,const QpParam & cQP,const Bool bUseHalfRoundingPoint)3140 Void TComTrQuant::transformSkipQuantOneSample(TComTU &rTu, const ComponentID compID, const Pel resiDiff, TCoeff* pcCoeff, const UInt uiPos, const QpParam &cQP, const Bool bUseHalfRoundingPoint)
3141 {
3142         TComDataCU    *pcCU                           = rTu.getCU();
3143   const UInt           uiAbsPartIdx                   = rTu.GetAbsPartIdxTU();
3144   const TComRectangle &rect                           = rTu.getRect(compID);
3145   const UInt           uiWidth                        = rect.width;
3146   const UInt           uiHeight                       = rect.height;
3147   const Int            iTransformShift                = getTransformShift(toChannelType(compID), rTu.GetEquivalentLog2TrSize(compID));
3148   const Int            scalingListType                = getScalingListType(pcCU->getPredictionMode(uiAbsPartIdx), compID);
3149   const Bool           enableScalingLists             = getUseScalingList(uiWidth, uiHeight, true);
3150   const Int            defaultQuantisationCoefficient = g_quantScales[cQP.rem];
3151 
3152   assert( scalingListType < SCALING_LIST_NUM );
3153   const Int *const piQuantCoeff = getQuantCoeff( scalingListType, cQP.rem, (rTu.GetEquivalentLog2TrSize(compID)-2) );
3154 
3155 
3156   /* for 422 chroma blocks, the effective scaling applied during transformation is not a power of 2, hence it cannot be
3157   * implemented as a bit-shift (the quantised result will be sqrt(2) * larger than required). Alternatively, adjust the
3158   * uiLog2TrSize applied in iTransformShift, such that the result is 1/sqrt(2) the required result (i.e. smaller)
3159   * Then a QP+3 (sqrt(2)) or QP-3 (1/sqrt(2)) method could be used to get the required result
3160   */
3161 
3162   const Int iQBits = QUANT_SHIFT + cQP.per + iTransformShift;
3163   // QBits will be OK for any internal bit depth as the reduction in transform shift is balanced by an increase in Qp_per due to QpBDOffset
3164 
3165   const Int iAdd = ( bUseHalfRoundingPoint ? 256 : (pcCU->getSlice()->getSliceType() == I_SLICE ? 171 : 85) ) << (iQBits - 9);
3166 
3167   TCoeff transformedCoefficient;
3168 
3169   // transform-skip
3170   if (iTransformShift >= 0)
3171   {
3172     transformedCoefficient = resiDiff << iTransformShift;
3173   }
3174   else // for very high bit depths
3175   {
3176     const Int iTrShiftNeg  = -iTransformShift;
3177     const Int offset       = 1 << (iTrShiftNeg - 1);
3178     transformedCoefficient = ( resiDiff + offset ) >> iTrShiftNeg;
3179   }
3180 
3181   // quantization
3182   const TCoeff iSign = (transformedCoefficient < 0 ? -1: 1);
3183 
3184   const Int quantisationCoefficient = enableScalingLists ? piQuantCoeff[uiPos] : defaultQuantisationCoefficient;
3185 
3186   const Int64 tmpLevel = (Int64)abs(transformedCoefficient) * quantisationCoefficient;
3187 
3188   const TCoeff quantisedCoefficient = (TCoeff((tmpLevel + iAdd ) >> iQBits)) * iSign;
3189 
3190   const TCoeff entropyCodingMinimum = -(1 << g_maxTrDynamicRange[toChannelType(compID)]);
3191   const TCoeff entropyCodingMaximum =  (1 << g_maxTrDynamicRange[toChannelType(compID)]) - 1;
3192   pcCoeff[ uiPos ] = Clip3<TCoeff>( entropyCodingMinimum, entropyCodingMaximum, quantisedCoefficient );
3193 }
3194 
3195 
invTrSkipDeQuantOneSample(TComTU & rTu,ComponentID compID,TCoeff inSample,Pel & reconSample,const QpParam & cQP,UInt uiPos)3196 Void TComTrQuant::invTrSkipDeQuantOneSample( TComTU &rTu, ComponentID compID, TCoeff inSample, Pel &reconSample, const QpParam &cQP, UInt uiPos )
3197 {
3198         TComDataCU    *pcCU               = rTu.getCU();
3199   const UInt           uiAbsPartIdx       = rTu.GetAbsPartIdxTU();
3200   const TComRectangle &rect               = rTu.getRect(compID);
3201   const UInt           uiWidth            = rect.width;
3202   const UInt           uiHeight           = rect.height;
3203   const Int            QP_per             = cQP.per;
3204   const Int            QP_rem             = cQP.rem;
3205   const Int            iTransformShift    = getTransformShift(toChannelType(compID), rTu.GetEquivalentLog2TrSize(compID));
3206   const Int            scalingListType    = getScalingListType(pcCU->getPredictionMode(uiAbsPartIdx), compID);
3207   const Bool           enableScalingLists = getUseScalingList(uiWidth, uiHeight, true);
3208   const UInt           uiLog2TrSize       = rTu.GetEquivalentLog2TrSize(compID);
3209 
3210   assert( scalingListType < SCALING_LIST_NUM );
3211 
3212   const Int rightShift = (IQUANT_SHIFT - (iTransformShift + QP_per)) + (enableScalingLists ? LOG2_SCALING_LIST_NEUTRAL_VALUE : 0);
3213 
3214   const TCoeff transformMinimum = -(1 << g_maxTrDynamicRange[toChannelType(compID)]);
3215   const TCoeff transformMaximum =  (1 << g_maxTrDynamicRange[toChannelType(compID)]) - 1;
3216 
3217   // Dequantisation
3218 
3219   TCoeff dequantisedSample;
3220 
3221   if(enableScalingLists)
3222   {
3223     const UInt             dequantCoefBits     = 1 + IQUANT_SHIFT + SCALING_LIST_BITS;
3224     const UInt             targetInputBitDepth = std::min<UInt>((g_maxTrDynamicRange[toChannelType(compID)] + 1), (((sizeof(Intermediate_Int) * 8) + rightShift) - dequantCoefBits));
3225 
3226     const Intermediate_Int inputMinimum        = -(1 << (targetInputBitDepth - 1));
3227     const Intermediate_Int inputMaximum        =  (1 << (targetInputBitDepth - 1)) - 1;
3228 
3229     Int *piDequantCoef = getDequantCoeff(scalingListType,QP_rem,uiLog2TrSize-2);
3230 
3231     if(rightShift > 0)
3232     {
3233       const Intermediate_Int iAdd      = 1 << (rightShift - 1);
3234       const TCoeff           clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, inSample));
3235       const Intermediate_Int iCoeffQ   = ((Intermediate_Int(clipQCoef) * piDequantCoef[uiPos]) + iAdd ) >> rightShift;
3236 
3237       dequantisedSample = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ));
3238     }
3239     else
3240     {
3241       const Int              leftShift = -rightShift;
3242       const TCoeff           clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, inSample));
3243       const Intermediate_Int iCoeffQ   = (Intermediate_Int(clipQCoef) * piDequantCoef[uiPos]) << leftShift;
3244 
3245       dequantisedSample = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ));
3246     }
3247   }
3248   else
3249   {
3250     const Int scale     =  g_invQuantScales[QP_rem];
3251     const Int scaleBits =     (IQUANT_SHIFT + 1)   ;
3252 
3253     const UInt             targetInputBitDepth = std::min<UInt>((g_maxTrDynamicRange[toChannelType(compID)] + 1), (((sizeof(Intermediate_Int) * 8) + rightShift) - scaleBits));
3254     const Intermediate_Int inputMinimum        = -(1 << (targetInputBitDepth - 1));
3255     const Intermediate_Int inputMaximum        =  (1 << (targetInputBitDepth - 1)) - 1;
3256 
3257     if (rightShift > 0)
3258     {
3259       const Intermediate_Int iAdd      = 1 << (rightShift - 1);
3260       const TCoeff           clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, inSample));
3261       const Intermediate_Int iCoeffQ   = (Intermediate_Int(clipQCoef) * scale + iAdd) >> rightShift;
3262 
3263       dequantisedSample = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ));
3264     }
3265     else
3266     {
3267       const Int              leftShift = -rightShift;
3268       const TCoeff           clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, inSample));
3269       const Intermediate_Int iCoeffQ   = (Intermediate_Int(clipQCoef) * scale) << leftShift;
3270 
3271       dequantisedSample = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ));
3272     }
3273   }
3274 
3275   // Inverse transform-skip
3276 
3277   if (iTransformShift >= 0)
3278   {
3279     const TCoeff offset = iTransformShift==0 ? 0 : (1 << (iTransformShift - 1));
3280     reconSample =  Pel(( dequantisedSample + offset ) >> iTransformShift);
3281   }
3282   else //for very high bit depths
3283   {
3284     const Int iTrShiftNeg = -iTransformShift;
3285     reconSample = Pel(dequantisedSample << iTrShiftNeg);
3286   }
3287 }
3288 
3289 
crossComponentPrediction(TComTU & rTu,const ComponentID compID,const Pel * piResiL,const Pel * piResiC,Pel * piResiT,const Int width,const Int height,const Int strideL,const Int strideC,const Int strideT,const Bool reverse)3290 Void TComTrQuant::crossComponentPrediction(       TComTU      & rTu,
3291                                             const ComponentID   compID,
3292                                             const Pel         * piResiL,
3293                                             const Pel         * piResiC,
3294                                                   Pel         * piResiT,
3295                                             const Int           width,
3296                                             const Int           height,
3297                                             const Int           strideL,
3298                                             const Int           strideC,
3299                                             const Int           strideT,
3300                                             const Bool          reverse )
3301 {
3302   const Pel *pResiL = piResiL;
3303   const Pel *pResiC = piResiC;
3304         Pel *pResiT = piResiT;
3305 
3306   TComDataCU *pCU = rTu.getCU();
3307   const Char alpha = pCU->getCrossComponentPredictionAlpha( rTu.GetAbsPartIdxTU( compID ), compID );
3308   const Int diffBitDepth = pCU->getSlice()->getSPS()->getDifferentialLumaChromaBitDepth();
3309 
3310   for( Int y = 0; y < height; y++ )
3311   {
3312     if (reverse)
3313     {
3314       for( Int x = 0; x < width; x++ )
3315       {
3316         pResiT[x] = pResiC[x] + (( alpha * rightShift( pResiL[x], diffBitDepth) ) >> 3);
3317       }
3318     }
3319     else
3320     {
3321       for( Int x = 0; x < width; x++ )
3322       {
3323         pResiT[x] = pResiC[x] - (( alpha * rightShift(pResiL[x], diffBitDepth) ) >> 3);
3324       }
3325     }
3326 
3327     pResiL += strideL;
3328     pResiC += strideC;
3329     pResiT += strideT;
3330   }
3331 }
3332 
3333 //! \}
3334