1 /* The copyright in this software is being made available under the BSD
2 * License, included below. This software may be subject to other third party
3 * and contributor rights, including patent rights, and no such rights are
4 * granted under this license.
5 *
6 * Copyright (c) 2010-2014, ITU/ISO/IEC
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions are met:
11 *
12 * * Redistributions of source code must retain the above copyright notice,
13 * this list of conditions and the following disclaimer.
14 * * Redistributions in binary form must reproduce the above copyright notice,
15 * this list of conditions and the following disclaimer in the documentation
16 * and/or other materials provided with the distribution.
17 * * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
18 * be used to endorse or promote products derived from this software without
19 * specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
31 * THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34 /** \file TComTrQuant.cpp
35 \brief transform and quantization class
36 */
37
38 #include <stdlib.h>
39 #include <math.h>
40 #include <limits>
41 #include <memory.h>
42 #include "TComTrQuant.h"
43 #include "TComPic.h"
44 #include "ContextTables.h"
45 #include "TComTU.h"
46 #include "Debug.h"
47
48 typedef struct
49 {
50 Int iNNZbeforePos0;
51 Double d64CodedLevelandDist; // distortion and level cost only
52 Double d64UncodedDist; // all zero coded block distortion
53 Double d64SigCost;
54 Double d64SigCost_0;
55 } coeffGroupRDStats;
56
57 //! \ingroup TLibCommon
58 //! \{
59
60 // ====================================================================================================================
61 // Constants
62 // ====================================================================================================================
63
64 #define RDOQ_CHROMA 1 ///< use of RDOQ in chroma
65
66
67 // ====================================================================================================================
68 // QpParam constructor
69 // ====================================================================================================================
70
QpParam(const Int qpy,const ChannelType chType,const Int qpBdOffset,const Int chromaQPOffset,const ChromaFormat chFmt)71 QpParam::QpParam(const Int qpy,
72 const ChannelType chType,
73 const Int qpBdOffset,
74 const Int chromaQPOffset,
75 const ChromaFormat chFmt )
76 {
77 Int baseQp;
78
79 if(isLuma(chType))
80 {
81 baseQp = qpy + qpBdOffset;
82 }
83 else
84 {
85 baseQp = Clip3( -qpBdOffset, (chromaQPMappingTableSize - 1), qpy + chromaQPOffset );
86
87 if(baseQp < 0)
88 {
89 baseQp = baseQp + qpBdOffset;
90 }
91 else
92 {
93 baseQp = getScaledChromaQP(baseQp, chFmt) + qpBdOffset;
94 }
95 }
96
97 Qp =baseQp;
98 per=baseQp/6;
99 rem=baseQp%6;
100 }
101
QpParam(const TComDataCU & cu,const ComponentID compID)102 QpParam::QpParam(const TComDataCU &cu, const ComponentID compID)
103 {
104 Int chromaQpOffset = 0;
105
106 if (isChroma(compID))
107 {
108 chromaQpOffset += cu.getSlice()->getPPS()->getQpOffset(compID);
109 chromaQpOffset += cu.getSlice()->getSliceChromaQpDelta(compID);
110
111 chromaQpOffset += cu.getSlice()->getPPS()->getChromaQpAdjTableAt(cu.getChromaQpAdj(0)).u.offset[Int(compID)-1];
112 }
113
114 *this = QpParam(cu.getQP( 0 ),
115 toChannelType(compID),
116 cu.getSlice()->getSPS()->getQpBDOffset(toChannelType(compID)),
117 chromaQpOffset,
118 cu.getPic()->getChromaFormat());
119 }
120
121
122 // ====================================================================================================================
123 // TComTrQuant class member functions
124 // ====================================================================================================================
125
TComTrQuant()126 TComTrQuant::TComTrQuant()
127 {
128 // allocate temporary buffers
129 m_plTempCoeff = new TCoeff[ MAX_CU_SIZE*MAX_CU_SIZE ];
130
131 // allocate bit estimation class (for RDOQ)
132 m_pcEstBitsSbac = new estBitsSbacStruct;
133 initScalingList();
134 }
135
~TComTrQuant()136 TComTrQuant::~TComTrQuant()
137 {
138 // delete temporary buffers
139 if ( m_plTempCoeff )
140 {
141 delete [] m_plTempCoeff;
142 m_plTempCoeff = NULL;
143 }
144
145 // delete bit estimation class
146 if ( m_pcEstBitsSbac )
147 {
148 delete m_pcEstBitsSbac;
149 }
150 destroyScalingList();
151 }
152
153 #if ADAPTIVE_QP_SELECTION
storeSliceQpNext(TComSlice * pcSlice)154 Void TComTrQuant::storeSliceQpNext(TComSlice* pcSlice)
155 {
156 // NOTE: does this work with negative QPs or when some blocks are transquant-bypass enabled?
157
158 Int qpBase = pcSlice->getSliceQpBase();
159 Int sliceQpused = pcSlice->getSliceQp();
160 Int sliceQpnext;
161 Double alpha = qpBase < 17 ? 0.5 : 1;
162
163 Int cnt=0;
164 for(Int u=1; u<=LEVEL_RANGE; u++)
165 {
166 cnt += m_sliceNsamples[u] ;
167 }
168
169 if( !m_useRDOQ )
170 {
171 sliceQpused = qpBase;
172 alpha = 0.5;
173 }
174
175 if( cnt > 120 )
176 {
177 Double sum = 0;
178 Int k = 0;
179 for(Int u=1; u<LEVEL_RANGE; u++)
180 {
181 sum += u*m_sliceSumC[u];
182 k += u*u*m_sliceNsamples[u];
183 }
184
185 Int v;
186 Double q[MAX_QP+1] ;
187 for(v=0; v<=MAX_QP; v++)
188 {
189 q[v] = (Double)(g_invQuantScales[v%6] * (1<<(v/6)))/64 ;
190 }
191
192 Double qnext = sum/k * q[sliceQpused] / (1<<ARL_C_PRECISION);
193
194 for(v=0; v<MAX_QP; v++)
195 {
196 if(qnext < alpha * q[v] + (1 - alpha) * q[v+1] )
197 {
198 break;
199 }
200 }
201 sliceQpnext = Clip3(sliceQpused - 3, sliceQpused + 3, v);
202 }
203 else
204 {
205 sliceQpnext = sliceQpused;
206 }
207
208 m_qpDelta[qpBase] = sliceQpnext - qpBase;
209 }
210
initSliceQpDelta()211 Void TComTrQuant::initSliceQpDelta()
212 {
213 for(Int qp=0; qp<=MAX_QP; qp++)
214 {
215 m_qpDelta[qp] = qp < 17 ? 0 : 1;
216 }
217 }
218
clearSliceARLCnt()219 Void TComTrQuant::clearSliceARLCnt()
220 {
221 memset(m_sliceSumC, 0, sizeof(Double)*(LEVEL_RANGE+1));
222 memset(m_sliceNsamples, 0, sizeof(Int)*(LEVEL_RANGE+1));
223 }
224 #endif
225
226
227
228 #if MATRIX_MULT
229 /** NxN forward transform (2D) using brute force matrix multiplication (3 nested loops)
230 * \param block pointer to input data (residual)
231 * \param coeff pointer to output data (transform coefficients)
232 * \param uiStride stride of input data
233 * \param uiTrSize transform size (uiTrSize x uiTrSize)
234 * \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
235 */
xTr(Int bitDepth,Pel * block,TCoeff * coeff,UInt uiStride,UInt uiTrSize,Bool useDST,const Int maxTrDynamicRange)236 Void xTr(Int bitDepth, Pel *block, TCoeff *coeff, UInt uiStride, UInt uiTrSize, Bool useDST, const Int maxTrDynamicRange)
237 {
238 UInt i,j,k;
239 TCoeff iSum;
240 TCoeff tmp[MAX_TU_SIZE * MAX_TU_SIZE];
241 const TMatrixCoeff *iT;
242 UInt uiLog2TrSize = g_aucConvertToBit[ uiTrSize ] + 2;
243
244 if (uiTrSize==4)
245 {
246 iT = (useDST ? g_as_DST_MAT_4[TRANSFORM_FORWARD][0] : g_aiT4[TRANSFORM_FORWARD][0]);
247 }
248 else if (uiTrSize==8)
249 {
250 iT = g_aiT8[TRANSFORM_FORWARD][0];
251 }
252 else if (uiTrSize==16)
253 {
254 iT = g_aiT16[TRANSFORM_FORWARD][0];
255 }
256 else if (uiTrSize==32)
257 {
258 iT = g_aiT32[TRANSFORM_FORWARD][0];
259 }
260 else
261 {
262 assert(0);
263 }
264
265 static const Int TRANSFORM_MATRIX_SHIFT = g_transformMatrixShift[TRANSFORM_FORWARD];
266
267 const Int shift_1st = (uiLog2TrSize + bitDepth + TRANSFORM_MATRIX_SHIFT) - maxTrDynamicRange;
268 const Int shift_2nd = uiLog2TrSize + TRANSFORM_MATRIX_SHIFT;
269 const Int add_1st = (shift_1st>0) ? (1<<(shift_1st-1)) : 0;
270 const Int add_2nd = 1<<(shift_2nd-1);
271
272 /* Horizontal transform */
273
274 for (i=0; i<uiTrSize; i++)
275 {
276 for (j=0; j<uiTrSize; j++)
277 {
278 iSum = 0;
279 for (k=0; k<uiTrSize; k++)
280 {
281 iSum += iT[i*uiTrSize+k]*block[j*uiStride+k];
282 }
283 tmp[i*uiTrSize+j] = (iSum + add_1st)>>shift_1st;
284 }
285 }
286
287 /* Vertical transform */
288 for (i=0; i<uiTrSize; i++)
289 {
290 for (j=0; j<uiTrSize; j++)
291 {
292 iSum = 0;
293 for (k=0; k<uiTrSize; k++)
294 {
295 iSum += iT[i*uiTrSize+k]*tmp[j*uiTrSize+k];
296 }
297 coeff[i*uiTrSize+j] = (iSum + add_2nd)>>shift_2nd;
298 }
299 }
300 }
301
302 /** NxN inverse transform (2D) using brute force matrix multiplication (3 nested loops)
303 * \param coeff pointer to input data (transform coefficients)
304 * \param block pointer to output data (residual)
305 * \param uiStride stride of output data
306 * \param uiTrSize transform size (uiTrSize x uiTrSize)
307 * \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
308 */
xITr(Int bitDepth,TCoeff * coeff,Pel * block,UInt uiStride,UInt uiTrSize,Bool useDST,const Int maxTrDynamicRange)309 Void xITr(Int bitDepth, TCoeff *coeff, Pel *block, UInt uiStride, UInt uiTrSize, Bool useDST, const Int maxTrDynamicRange)
310 {
311 UInt i,j,k;
312 TCoeff iSum;
313 TCoeff tmp[MAX_TU_SIZE * MAX_TU_SIZE];
314 const TMatrixCoeff *iT;
315
316 if (uiTrSize==4)
317 {
318 iT = (useDST ? g_as_DST_MAT_4[TRANSFORM_INVERSE][0] : g_aiT4[TRANSFORM_INVERSE][0]);
319 }
320 else if (uiTrSize==8)
321 {
322 iT = g_aiT8[TRANSFORM_INVERSE][0];
323 }
324 else if (uiTrSize==16)
325 {
326 iT = g_aiT16[TRANSFORM_INVERSE][0];
327 }
328 else if (uiTrSize==32)
329 {
330 iT = g_aiT32[TRANSFORM_INVERSE][0];
331 }
332 else
333 {
334 assert(0);
335 }
336
337 static const Int TRANSFORM_MATRIX_SHIFT = g_transformMatrixShift[TRANSFORM_INVERSE];
338
339 const Int shift_1st = TRANSFORM_MATRIX_SHIFT + 1; //1 has been added to shift_1st at the expense of shift_2nd
340 const Int shift_2nd = (TRANSFORM_MATRIX_SHIFT + maxTrDynamicRange - 1) - bitDepth;
341 const TCoeff clipMinimum = -(1 << maxTrDynamicRange);
342 const TCoeff clipMaximum = (1 << maxTrDynamicRange) - 1;
343 assert(shift_2nd>=0);
344 const Int add_1st = 1<<(shift_1st-1);
345 const Int add_2nd = (shift_2nd>0) ? (1<<(shift_2nd-1)) : 0;
346
347 /* Horizontal transform */
348 for (i=0; i<uiTrSize; i++)
349 {
350 for (j=0; j<uiTrSize; j++)
351 {
352 iSum = 0;
353 for (k=0; k<uiTrSize; k++)
354 {
355 iSum += iT[k*uiTrSize+i]*coeff[k*uiTrSize+j];
356 }
357
358 // Clipping here is not in the standard, but is used to protect the "Pel" data type into which the inverse-transformed samples will be copied
359 tmp[i*uiTrSize+j] = Clip3<TCoeff>(clipMinimum, clipMaximum, (iSum + add_1st)>>shift_1st);
360 }
361 }
362
363 /* Vertical transform */
364 for (i=0; i<uiTrSize; i++)
365 {
366 for (j=0; j<uiTrSize; j++)
367 {
368 iSum = 0;
369 for (k=0; k<uiTrSize; k++)
370 {
371 iSum += iT[k*uiTrSize+j]*tmp[i*uiTrSize+k];
372 }
373
374 block[i*uiStride+j] = Clip3<TCoeff>(std::numeric_limits<Pel>::min(), std::numeric_limits<Pel>::max(), (iSum + add_2nd)>>shift_2nd);
375 }
376 }
377 }
378
379 #endif //MATRIX_MULT
380
381
382 /** 4x4 forward transform implemented using partial butterfly structure (1D)
383 * \param src input data (residual)
384 * \param dst output data (transform coefficients)
385 * \param shift specifies right shift after 1D transform
386 */
partialButterfly4(TCoeff * src,TCoeff * dst,Int shift,Int line)387 Void partialButterfly4(TCoeff *src, TCoeff *dst, Int shift, Int line)
388 {
389 Int j;
390 TCoeff E[2],O[2];
391 TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0;
392
393 for (j=0; j<line; j++)
394 {
395 /* E and O */
396 E[0] = src[0] + src[3];
397 O[0] = src[0] - src[3];
398 E[1] = src[1] + src[2];
399 O[1] = src[1] - src[2];
400
401 dst[0] = (g_aiT4[TRANSFORM_FORWARD][0][0]*E[0] + g_aiT4[TRANSFORM_FORWARD][0][1]*E[1] + add)>>shift;
402 dst[2*line] = (g_aiT4[TRANSFORM_FORWARD][2][0]*E[0] + g_aiT4[TRANSFORM_FORWARD][2][1]*E[1] + add)>>shift;
403 dst[line] = (g_aiT4[TRANSFORM_FORWARD][1][0]*O[0] + g_aiT4[TRANSFORM_FORWARD][1][1]*O[1] + add)>>shift;
404 dst[3*line] = (g_aiT4[TRANSFORM_FORWARD][3][0]*O[0] + g_aiT4[TRANSFORM_FORWARD][3][1]*O[1] + add)>>shift;
405
406 src += 4;
407 dst ++;
408 }
409 }
410
411 // Fast DST Algorithm. Full matrix multiplication for DST and Fast DST algorithm
412 // give identical results
fastForwardDst(TCoeff * block,TCoeff * coeff,Int shift)413 Void fastForwardDst(TCoeff *block, TCoeff *coeff, Int shift) // input block, output coeff
414 {
415 Int i;
416 TCoeff c[4];
417 TCoeff rnd_factor = (shift > 0) ? (1<<(shift-1)) : 0;
418 for (i=0; i<4; i++)
419 {
420 // Intermediate Variables
421 c[0] = block[4*i+0];
422 c[1] = block[4*i+1];
423 c[2] = block[4*i+2];
424 c[3] = block[4*i+3];
425
426 for (Int row = 0; row < 4; row++)
427 {
428 TCoeff result = 0;
429 for (Int column = 0; column < 4; column++)
430 result += c[column] * g_as_DST_MAT_4[TRANSFORM_FORWARD][row][column]; // use the defined matrix, rather than hard-wired numbers
431
432 coeff[(row * 4) + i] = rightShift((result + rnd_factor), shift);
433 }
434 }
435 }
436
fastInverseDst(TCoeff * tmp,TCoeff * block,Int shift,const TCoeff outputMinimum,const TCoeff outputMaximum)437 Void fastInverseDst(TCoeff *tmp, TCoeff *block, Int shift, const TCoeff outputMinimum, const TCoeff outputMaximum) // input tmp, output block
438 {
439 Int i;
440 TCoeff c[4];
441 TCoeff rnd_factor = (shift > 0) ? (1<<(shift-1)) : 0;
442 for (i=0; i<4; i++)
443 {
444 // Intermediate Variables
445 c[0] = tmp[ i];
446 c[1] = tmp[4 +i];
447 c[2] = tmp[8 +i];
448 c[3] = tmp[12+i];
449
450 for (Int column = 0; column < 4; column++)
451 {
452 TCoeff &result = block[(i * 4) + column];
453
454 result = 0;
455 for (Int row = 0; row < 4; row++)
456 result += c[row] * g_as_DST_MAT_4[TRANSFORM_INVERSE][row][column]; // use the defined matrix, rather than hard-wired numbers
457
458 result = Clip3( outputMinimum, outputMaximum, rightShift((result + rnd_factor), shift));
459 }
460 }
461 }
462
463 /** 4x4 inverse transform implemented using partial butterfly structure (1D)
464 * \param src input data (transform coefficients)
465 * \param dst output data (residual)
466 * \param shift specifies right shift after 1D transform
467 */
partialButterflyInverse4(TCoeff * src,TCoeff * dst,Int shift,Int line,const TCoeff outputMinimum,const TCoeff outputMaximum)468 Void partialButterflyInverse4(TCoeff *src, TCoeff *dst, Int shift, Int line, const TCoeff outputMinimum, const TCoeff outputMaximum)
469 {
470 Int j;
471 TCoeff E[2],O[2];
472 TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0;
473
474 for (j=0; j<line; j++)
475 {
476 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
477 O[0] = g_aiT4[TRANSFORM_INVERSE][1][0]*src[line] + g_aiT4[TRANSFORM_INVERSE][3][0]*src[3*line];
478 O[1] = g_aiT4[TRANSFORM_INVERSE][1][1]*src[line] + g_aiT4[TRANSFORM_INVERSE][3][1]*src[3*line];
479 E[0] = g_aiT4[TRANSFORM_INVERSE][0][0]*src[0] + g_aiT4[TRANSFORM_INVERSE][2][0]*src[2*line];
480 E[1] = g_aiT4[TRANSFORM_INVERSE][0][1]*src[0] + g_aiT4[TRANSFORM_INVERSE][2][1]*src[2*line];
481
482 /* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
483 dst[0] = Clip3( outputMinimum, outputMaximum, (E[0] + O[0] + add)>>shift );
484 dst[1] = Clip3( outputMinimum, outputMaximum, (E[1] + O[1] + add)>>shift );
485 dst[2] = Clip3( outputMinimum, outputMaximum, (E[1] - O[1] + add)>>shift );
486 dst[3] = Clip3( outputMinimum, outputMaximum, (E[0] - O[0] + add)>>shift );
487
488 src ++;
489 dst += 4;
490 }
491 }
492
493 /** 8x8 forward transform implemented using partial butterfly structure (1D)
494 * \param src input data (residual)
495 * \param dst output data (transform coefficients)
496 * \param shift specifies right shift after 1D transform
497 */
partialButterfly8(TCoeff * src,TCoeff * dst,Int shift,Int line)498 Void partialButterfly8(TCoeff *src, TCoeff *dst, Int shift, Int line)
499 {
500 Int j,k;
501 TCoeff E[4],O[4];
502 TCoeff EE[2],EO[2];
503 TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0;
504
505 for (j=0; j<line; j++)
506 {
507 /* E and O*/
508 for (k=0;k<4;k++)
509 {
510 E[k] = src[k] + src[7-k];
511 O[k] = src[k] - src[7-k];
512 }
513 /* EE and EO */
514 EE[0] = E[0] + E[3];
515 EO[0] = E[0] - E[3];
516 EE[1] = E[1] + E[2];
517 EO[1] = E[1] - E[2];
518
519 dst[0] = (g_aiT8[TRANSFORM_FORWARD][0][0]*EE[0] + g_aiT8[TRANSFORM_FORWARD][0][1]*EE[1] + add)>>shift;
520 dst[4*line] = (g_aiT8[TRANSFORM_FORWARD][4][0]*EE[0] + g_aiT8[TRANSFORM_FORWARD][4][1]*EE[1] + add)>>shift;
521 dst[2*line] = (g_aiT8[TRANSFORM_FORWARD][2][0]*EO[0] + g_aiT8[TRANSFORM_FORWARD][2][1]*EO[1] + add)>>shift;
522 dst[6*line] = (g_aiT8[TRANSFORM_FORWARD][6][0]*EO[0] + g_aiT8[TRANSFORM_FORWARD][6][1]*EO[1] + add)>>shift;
523
524 dst[line] = (g_aiT8[TRANSFORM_FORWARD][1][0]*O[0] + g_aiT8[TRANSFORM_FORWARD][1][1]*O[1] + g_aiT8[TRANSFORM_FORWARD][1][2]*O[2] + g_aiT8[TRANSFORM_FORWARD][1][3]*O[3] + add)>>shift;
525 dst[3*line] = (g_aiT8[TRANSFORM_FORWARD][3][0]*O[0] + g_aiT8[TRANSFORM_FORWARD][3][1]*O[1] + g_aiT8[TRANSFORM_FORWARD][3][2]*O[2] + g_aiT8[TRANSFORM_FORWARD][3][3]*O[3] + add)>>shift;
526 dst[5*line] = (g_aiT8[TRANSFORM_FORWARD][5][0]*O[0] + g_aiT8[TRANSFORM_FORWARD][5][1]*O[1] + g_aiT8[TRANSFORM_FORWARD][5][2]*O[2] + g_aiT8[TRANSFORM_FORWARD][5][3]*O[3] + add)>>shift;
527 dst[7*line] = (g_aiT8[TRANSFORM_FORWARD][7][0]*O[0] + g_aiT8[TRANSFORM_FORWARD][7][1]*O[1] + g_aiT8[TRANSFORM_FORWARD][7][2]*O[2] + g_aiT8[TRANSFORM_FORWARD][7][3]*O[3] + add)>>shift;
528
529 src += 8;
530 dst ++;
531 }
532 }
533
534 /** 8x8 inverse transform implemented using partial butterfly structure (1D)
535 * \param src input data (transform coefficients)
536 * \param dst output data (residual)
537 * \param shift specifies right shift after 1D transform
538 */
partialButterflyInverse8(TCoeff * src,TCoeff * dst,Int shift,Int line,const TCoeff outputMinimum,const TCoeff outputMaximum)539 Void partialButterflyInverse8(TCoeff *src, TCoeff *dst, Int shift, Int line, const TCoeff outputMinimum, const TCoeff outputMaximum)
540 {
541 Int j,k;
542 TCoeff E[4],O[4];
543 TCoeff EE[2],EO[2];
544 TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0;
545
546 for (j=0; j<line; j++)
547 {
548 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
549 for (k=0;k<4;k++)
550 {
551 O[k] = g_aiT8[TRANSFORM_INVERSE][ 1][k]*src[line] + g_aiT8[TRANSFORM_INVERSE][ 3][k]*src[3*line] +
552 g_aiT8[TRANSFORM_INVERSE][ 5][k]*src[5*line] + g_aiT8[TRANSFORM_INVERSE][ 7][k]*src[7*line];
553 }
554
555 EO[0] = g_aiT8[TRANSFORM_INVERSE][2][0]*src[ 2*line ] + g_aiT8[TRANSFORM_INVERSE][6][0]*src[ 6*line ];
556 EO[1] = g_aiT8[TRANSFORM_INVERSE][2][1]*src[ 2*line ] + g_aiT8[TRANSFORM_INVERSE][6][1]*src[ 6*line ];
557 EE[0] = g_aiT8[TRANSFORM_INVERSE][0][0]*src[ 0 ] + g_aiT8[TRANSFORM_INVERSE][4][0]*src[ 4*line ];
558 EE[1] = g_aiT8[TRANSFORM_INVERSE][0][1]*src[ 0 ] + g_aiT8[TRANSFORM_INVERSE][4][1]*src[ 4*line ];
559
560 /* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
561 E[0] = EE[0] + EO[0];
562 E[3] = EE[0] - EO[0];
563 E[1] = EE[1] + EO[1];
564 E[2] = EE[1] - EO[1];
565 for (k=0;k<4;k++)
566 {
567 dst[ k ] = Clip3( outputMinimum, outputMaximum, (E[k] + O[k] + add)>>shift );
568 dst[ k+4 ] = Clip3( outputMinimum, outputMaximum, (E[3-k] - O[3-k] + add)>>shift );
569 }
570 src ++;
571 dst += 8;
572 }
573 }
574
575 /** 16x16 forward transform implemented using partial butterfly structure (1D)
576 * \param src input data (residual)
577 * \param dst output data (transform coefficients)
578 * \param shift specifies right shift after 1D transform
579 */
partialButterfly16(TCoeff * src,TCoeff * dst,Int shift,Int line)580 Void partialButterfly16(TCoeff *src, TCoeff *dst, Int shift, Int line)
581 {
582 Int j,k;
583 TCoeff E[8],O[8];
584 TCoeff EE[4],EO[4];
585 TCoeff EEE[2],EEO[2];
586 TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0;
587
588 for (j=0; j<line; j++)
589 {
590 /* E and O*/
591 for (k=0;k<8;k++)
592 {
593 E[k] = src[k] + src[15-k];
594 O[k] = src[k] - src[15-k];
595 }
596 /* EE and EO */
597 for (k=0;k<4;k++)
598 {
599 EE[k] = E[k] + E[7-k];
600 EO[k] = E[k] - E[7-k];
601 }
602 /* EEE and EEO */
603 EEE[0] = EE[0] + EE[3];
604 EEO[0] = EE[0] - EE[3];
605 EEE[1] = EE[1] + EE[2];
606 EEO[1] = EE[1] - EE[2];
607
608 dst[ 0 ] = (g_aiT16[TRANSFORM_FORWARD][ 0][0]*EEE[0] + g_aiT16[TRANSFORM_FORWARD][ 0][1]*EEE[1] + add)>>shift;
609 dst[ 8*line ] = (g_aiT16[TRANSFORM_FORWARD][ 8][0]*EEE[0] + g_aiT16[TRANSFORM_FORWARD][ 8][1]*EEE[1] + add)>>shift;
610 dst[ 4*line ] = (g_aiT16[TRANSFORM_FORWARD][ 4][0]*EEO[0] + g_aiT16[TRANSFORM_FORWARD][ 4][1]*EEO[1] + add)>>shift;
611 dst[ 12*line] = (g_aiT16[TRANSFORM_FORWARD][12][0]*EEO[0] + g_aiT16[TRANSFORM_FORWARD][12][1]*EEO[1] + add)>>shift;
612
613 for (k=2;k<16;k+=4)
614 {
615 dst[ k*line ] = (g_aiT16[TRANSFORM_FORWARD][k][0]*EO[0] + g_aiT16[TRANSFORM_FORWARD][k][1]*EO[1] +
616 g_aiT16[TRANSFORM_FORWARD][k][2]*EO[2] + g_aiT16[TRANSFORM_FORWARD][k][3]*EO[3] + add)>>shift;
617 }
618
619 for (k=1;k<16;k+=2)
620 {
621 dst[ k*line ] = (g_aiT16[TRANSFORM_FORWARD][k][0]*O[0] + g_aiT16[TRANSFORM_FORWARD][k][1]*O[1] +
622 g_aiT16[TRANSFORM_FORWARD][k][2]*O[2] + g_aiT16[TRANSFORM_FORWARD][k][3]*O[3] +
623 g_aiT16[TRANSFORM_FORWARD][k][4]*O[4] + g_aiT16[TRANSFORM_FORWARD][k][5]*O[5] +
624 g_aiT16[TRANSFORM_FORWARD][k][6]*O[6] + g_aiT16[TRANSFORM_FORWARD][k][7]*O[7] + add)>>shift;
625 }
626
627 src += 16;
628 dst ++;
629
630 }
631 }
632
633 /** 16x16 inverse transform implemented using partial butterfly structure (1D)
634 * \param src input data (transform coefficients)
635 * \param dst output data (residual)
636 * \param shift specifies right shift after 1D transform
637 */
partialButterflyInverse16(TCoeff * src,TCoeff * dst,Int shift,Int line,const TCoeff outputMinimum,const TCoeff outputMaximum)638 Void partialButterflyInverse16(TCoeff *src, TCoeff *dst, Int shift, Int line, const TCoeff outputMinimum, const TCoeff outputMaximum)
639 {
640 Int j,k;
641 TCoeff E[8],O[8];
642 TCoeff EE[4],EO[4];
643 TCoeff EEE[2],EEO[2];
644 TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0;
645
646 for (j=0; j<line; j++)
647 {
648 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
649 for (k=0;k<8;k++)
650 {
651 O[k] = g_aiT16[TRANSFORM_INVERSE][ 1][k]*src[ line] + g_aiT16[TRANSFORM_INVERSE][ 3][k]*src[ 3*line] +
652 g_aiT16[TRANSFORM_INVERSE][ 5][k]*src[ 5*line] + g_aiT16[TRANSFORM_INVERSE][ 7][k]*src[ 7*line] +
653 g_aiT16[TRANSFORM_INVERSE][ 9][k]*src[ 9*line] + g_aiT16[TRANSFORM_INVERSE][11][k]*src[11*line] +
654 g_aiT16[TRANSFORM_INVERSE][13][k]*src[13*line] + g_aiT16[TRANSFORM_INVERSE][15][k]*src[15*line];
655 }
656 for (k=0;k<4;k++)
657 {
658 EO[k] = g_aiT16[TRANSFORM_INVERSE][ 2][k]*src[ 2*line] + g_aiT16[TRANSFORM_INVERSE][ 6][k]*src[ 6*line] +
659 g_aiT16[TRANSFORM_INVERSE][10][k]*src[10*line] + g_aiT16[TRANSFORM_INVERSE][14][k]*src[14*line];
660 }
661 EEO[0] = g_aiT16[TRANSFORM_INVERSE][4][0]*src[ 4*line ] + g_aiT16[TRANSFORM_INVERSE][12][0]*src[ 12*line ];
662 EEE[0] = g_aiT16[TRANSFORM_INVERSE][0][0]*src[ 0 ] + g_aiT16[TRANSFORM_INVERSE][ 8][0]*src[ 8*line ];
663 EEO[1] = g_aiT16[TRANSFORM_INVERSE][4][1]*src[ 4*line ] + g_aiT16[TRANSFORM_INVERSE][12][1]*src[ 12*line ];
664 EEE[1] = g_aiT16[TRANSFORM_INVERSE][0][1]*src[ 0 ] + g_aiT16[TRANSFORM_INVERSE][ 8][1]*src[ 8*line ];
665
666 /* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
667 for (k=0;k<2;k++)
668 {
669 EE[k] = EEE[k] + EEO[k];
670 EE[k+2] = EEE[1-k] - EEO[1-k];
671 }
672 for (k=0;k<4;k++)
673 {
674 E[k] = EE[k] + EO[k];
675 E[k+4] = EE[3-k] - EO[3-k];
676 }
677 for (k=0;k<8;k++)
678 {
679 dst[k] = Clip3( outputMinimum, outputMaximum, (E[k] + O[k] + add)>>shift );
680 dst[k+8] = Clip3( outputMinimum, outputMaximum, (E[7-k] - O[7-k] + add)>>shift );
681 }
682 src ++;
683 dst += 16;
684 }
685 }
686
687 /** 32x32 forward transform implemented using partial butterfly structure (1D)
688 * \param src input data (residual)
689 * \param dst output data (transform coefficients)
690 * \param shift specifies right shift after 1D transform
691 */
partialButterfly32(TCoeff * src,TCoeff * dst,Int shift,Int line)692 Void partialButterfly32(TCoeff *src, TCoeff *dst, Int shift, Int line)
693 {
694 Int j,k;
695 TCoeff E[16],O[16];
696 TCoeff EE[8],EO[8];
697 TCoeff EEE[4],EEO[4];
698 TCoeff EEEE[2],EEEO[2];
699 TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0;
700
701 for (j=0; j<line; j++)
702 {
703 /* E and O*/
704 for (k=0;k<16;k++)
705 {
706 E[k] = src[k] + src[31-k];
707 O[k] = src[k] - src[31-k];
708 }
709 /* EE and EO */
710 for (k=0;k<8;k++)
711 {
712 EE[k] = E[k] + E[15-k];
713 EO[k] = E[k] - E[15-k];
714 }
715 /* EEE and EEO */
716 for (k=0;k<4;k++)
717 {
718 EEE[k] = EE[k] + EE[7-k];
719 EEO[k] = EE[k] - EE[7-k];
720 }
721 /* EEEE and EEEO */
722 EEEE[0] = EEE[0] + EEE[3];
723 EEEO[0] = EEE[0] - EEE[3];
724 EEEE[1] = EEE[1] + EEE[2];
725 EEEO[1] = EEE[1] - EEE[2];
726
727 dst[ 0 ] = (g_aiT32[TRANSFORM_FORWARD][ 0][0]*EEEE[0] + g_aiT32[TRANSFORM_FORWARD][ 0][1]*EEEE[1] + add)>>shift;
728 dst[ 16*line ] = (g_aiT32[TRANSFORM_FORWARD][16][0]*EEEE[0] + g_aiT32[TRANSFORM_FORWARD][16][1]*EEEE[1] + add)>>shift;
729 dst[ 8*line ] = (g_aiT32[TRANSFORM_FORWARD][ 8][0]*EEEO[0] + g_aiT32[TRANSFORM_FORWARD][ 8][1]*EEEO[1] + add)>>shift;
730 dst[ 24*line ] = (g_aiT32[TRANSFORM_FORWARD][24][0]*EEEO[0] + g_aiT32[TRANSFORM_FORWARD][24][1]*EEEO[1] + add)>>shift;
731 for (k=4;k<32;k+=8)
732 {
733 dst[ k*line ] = (g_aiT32[TRANSFORM_FORWARD][k][0]*EEO[0] + g_aiT32[TRANSFORM_FORWARD][k][1]*EEO[1] +
734 g_aiT32[TRANSFORM_FORWARD][k][2]*EEO[2] + g_aiT32[TRANSFORM_FORWARD][k][3]*EEO[3] + add)>>shift;
735 }
736 for (k=2;k<32;k+=4)
737 {
738 dst[ k*line ] = (g_aiT32[TRANSFORM_FORWARD][k][0]*EO[0] + g_aiT32[TRANSFORM_FORWARD][k][1]*EO[1] +
739 g_aiT32[TRANSFORM_FORWARD][k][2]*EO[2] + g_aiT32[TRANSFORM_FORWARD][k][3]*EO[3] +
740 g_aiT32[TRANSFORM_FORWARD][k][4]*EO[4] + g_aiT32[TRANSFORM_FORWARD][k][5]*EO[5] +
741 g_aiT32[TRANSFORM_FORWARD][k][6]*EO[6] + g_aiT32[TRANSFORM_FORWARD][k][7]*EO[7] + add)>>shift;
742 }
743 for (k=1;k<32;k+=2)
744 {
745 dst[ k*line ] = (g_aiT32[TRANSFORM_FORWARD][k][ 0]*O[ 0] + g_aiT32[TRANSFORM_FORWARD][k][ 1]*O[ 1] +
746 g_aiT32[TRANSFORM_FORWARD][k][ 2]*O[ 2] + g_aiT32[TRANSFORM_FORWARD][k][ 3]*O[ 3] +
747 g_aiT32[TRANSFORM_FORWARD][k][ 4]*O[ 4] + g_aiT32[TRANSFORM_FORWARD][k][ 5]*O[ 5] +
748 g_aiT32[TRANSFORM_FORWARD][k][ 6]*O[ 6] + g_aiT32[TRANSFORM_FORWARD][k][ 7]*O[ 7] +
749 g_aiT32[TRANSFORM_FORWARD][k][ 8]*O[ 8] + g_aiT32[TRANSFORM_FORWARD][k][ 9]*O[ 9] +
750 g_aiT32[TRANSFORM_FORWARD][k][10]*O[10] + g_aiT32[TRANSFORM_FORWARD][k][11]*O[11] +
751 g_aiT32[TRANSFORM_FORWARD][k][12]*O[12] + g_aiT32[TRANSFORM_FORWARD][k][13]*O[13] +
752 g_aiT32[TRANSFORM_FORWARD][k][14]*O[14] + g_aiT32[TRANSFORM_FORWARD][k][15]*O[15] + add)>>shift;
753 }
754
755 src += 32;
756 dst ++;
757 }
758 }
759
760 /** 32x32 inverse transform implemented using partial butterfly structure (1D)
761 * \param src input data (transform coefficients)
762 * \param dst output data (residual)
763 * \param shift specifies right shift after 1D transform
764 */
partialButterflyInverse32(TCoeff * src,TCoeff * dst,Int shift,Int line,const TCoeff outputMinimum,const TCoeff outputMaximum)765 Void partialButterflyInverse32(TCoeff *src, TCoeff *dst, Int shift, Int line, const TCoeff outputMinimum, const TCoeff outputMaximum)
766 {
767 Int j,k;
768 TCoeff E[16],O[16];
769 TCoeff EE[8],EO[8];
770 TCoeff EEE[4],EEO[4];
771 TCoeff EEEE[2],EEEO[2];
772 TCoeff add = (shift > 0) ? (1<<(shift-1)) : 0;
773
774 for (j=0; j<line; j++)
775 {
776 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
777 for (k=0;k<16;k++)
778 {
779 O[k] = g_aiT32[TRANSFORM_INVERSE][ 1][k]*src[ line ] + g_aiT32[TRANSFORM_INVERSE][ 3][k]*src[ 3*line ] +
780 g_aiT32[TRANSFORM_INVERSE][ 5][k]*src[ 5*line ] + g_aiT32[TRANSFORM_INVERSE][ 7][k]*src[ 7*line ] +
781 g_aiT32[TRANSFORM_INVERSE][ 9][k]*src[ 9*line ] + g_aiT32[TRANSFORM_INVERSE][11][k]*src[ 11*line ] +
782 g_aiT32[TRANSFORM_INVERSE][13][k]*src[ 13*line ] + g_aiT32[TRANSFORM_INVERSE][15][k]*src[ 15*line ] +
783 g_aiT32[TRANSFORM_INVERSE][17][k]*src[ 17*line ] + g_aiT32[TRANSFORM_INVERSE][19][k]*src[ 19*line ] +
784 g_aiT32[TRANSFORM_INVERSE][21][k]*src[ 21*line ] + g_aiT32[TRANSFORM_INVERSE][23][k]*src[ 23*line ] +
785 g_aiT32[TRANSFORM_INVERSE][25][k]*src[ 25*line ] + g_aiT32[TRANSFORM_INVERSE][27][k]*src[ 27*line ] +
786 g_aiT32[TRANSFORM_INVERSE][29][k]*src[ 29*line ] + g_aiT32[TRANSFORM_INVERSE][31][k]*src[ 31*line ];
787 }
788 for (k=0;k<8;k++)
789 {
790 EO[k] = g_aiT32[TRANSFORM_INVERSE][ 2][k]*src[ 2*line ] + g_aiT32[TRANSFORM_INVERSE][ 6][k]*src[ 6*line ] +
791 g_aiT32[TRANSFORM_INVERSE][10][k]*src[ 10*line ] + g_aiT32[TRANSFORM_INVERSE][14][k]*src[ 14*line ] +
792 g_aiT32[TRANSFORM_INVERSE][18][k]*src[ 18*line ] + g_aiT32[TRANSFORM_INVERSE][22][k]*src[ 22*line ] +
793 g_aiT32[TRANSFORM_INVERSE][26][k]*src[ 26*line ] + g_aiT32[TRANSFORM_INVERSE][30][k]*src[ 30*line ];
794 }
795 for (k=0;k<4;k++)
796 {
797 EEO[k] = g_aiT32[TRANSFORM_INVERSE][ 4][k]*src[ 4*line ] + g_aiT32[TRANSFORM_INVERSE][12][k]*src[ 12*line ] +
798 g_aiT32[TRANSFORM_INVERSE][20][k]*src[ 20*line ] + g_aiT32[TRANSFORM_INVERSE][28][k]*src[ 28*line ];
799 }
800 EEEO[0] = g_aiT32[TRANSFORM_INVERSE][8][0]*src[ 8*line ] + g_aiT32[TRANSFORM_INVERSE][24][0]*src[ 24*line ];
801 EEEO[1] = g_aiT32[TRANSFORM_INVERSE][8][1]*src[ 8*line ] + g_aiT32[TRANSFORM_INVERSE][24][1]*src[ 24*line ];
802 EEEE[0] = g_aiT32[TRANSFORM_INVERSE][0][0]*src[ 0 ] + g_aiT32[TRANSFORM_INVERSE][16][0]*src[ 16*line ];
803 EEEE[1] = g_aiT32[TRANSFORM_INVERSE][0][1]*src[ 0 ] + g_aiT32[TRANSFORM_INVERSE][16][1]*src[ 16*line ];
804
805 /* Combining even and odd terms at each hierarchy levels to calculate the final spatial domain vector */
806 EEE[0] = EEEE[0] + EEEO[0];
807 EEE[3] = EEEE[0] - EEEO[0];
808 EEE[1] = EEEE[1] + EEEO[1];
809 EEE[2] = EEEE[1] - EEEO[1];
810 for (k=0;k<4;k++)
811 {
812 EE[k] = EEE[k] + EEO[k];
813 EE[k+4] = EEE[3-k] - EEO[3-k];
814 }
815 for (k=0;k<8;k++)
816 {
817 E[k] = EE[k] + EO[k];
818 E[k+8] = EE[7-k] - EO[7-k];
819 }
820 for (k=0;k<16;k++)
821 {
822 dst[k] = Clip3( outputMinimum, outputMaximum, (E[k] + O[k] + add)>>shift );
823 dst[k+16] = Clip3( outputMinimum, outputMaximum, (E[15-k] - O[15-k] + add)>>shift );
824 }
825 src ++;
826 dst += 32;
827 }
828 }
829
830 /** MxN forward transform (2D)
831 * \param block input data (residual)
832 * \param coeff output data (transform coefficients)
833 * \param iWidth input data (width of transform)
834 * \param iHeight input data (height of transform)
835 */
xTrMxN(Int bitDepth,TCoeff * block,TCoeff * coeff,Int iWidth,Int iHeight,Bool useDST,const Int maxTrDynamicRange)836 Void xTrMxN(Int bitDepth, TCoeff *block, TCoeff *coeff, Int iWidth, Int iHeight, Bool useDST, const Int maxTrDynamicRange)
837 {
838 static const Int TRANSFORM_MATRIX_SHIFT = g_transformMatrixShift[TRANSFORM_FORWARD];
839
840 const Int shift_1st = ((g_aucConvertToBit[iWidth] + 2) + bitDepth + TRANSFORM_MATRIX_SHIFT) - maxTrDynamicRange;
841 const Int shift_2nd = (g_aucConvertToBit[iHeight] + 2) + TRANSFORM_MATRIX_SHIFT;
842
843 assert(shift_1st >= 0);
844 assert(shift_2nd >= 0);
845
846 TCoeff tmp[ MAX_TU_SIZE * MAX_TU_SIZE ];
847
848 switch (iWidth)
849 {
850 case 4:
851 {
852 if ((iHeight == 4) && useDST) // Check for DCT or DST
853 {
854 fastForwardDst( block, tmp, shift_1st );
855 }
856 else partialButterfly4 ( block, tmp, shift_1st, iHeight );
857 }
858 break;
859
860 case 8: partialButterfly8 ( block, tmp, shift_1st, iHeight ); break;
861 case 16: partialButterfly16( block, tmp, shift_1st, iHeight ); break;
862 case 32: partialButterfly32( block, tmp, shift_1st, iHeight ); break;
863 default:
864 assert(0); exit (1); break;
865 }
866
867 switch (iHeight)
868 {
869 case 4:
870 {
871 if ((iWidth == 4) && useDST) // Check for DCT or DST
872 {
873 fastForwardDst( tmp, coeff, shift_2nd );
874 }
875 else partialButterfly4 ( tmp, coeff, shift_2nd, iWidth );
876 }
877 break;
878
879 case 8: partialButterfly8 ( tmp, coeff, shift_2nd, iWidth ); break;
880 case 16: partialButterfly16( tmp, coeff, shift_2nd, iWidth ); break;
881 case 32: partialButterfly32( tmp, coeff, shift_2nd, iWidth ); break;
882 default:
883 assert(0); exit (1); break;
884 }
885 }
886
887
888 /** MxN inverse transform (2D)
889 * \param coeff input data (transform coefficients)
890 * \param block output data (residual)
891 * \param iWidth input data (width of transform)
892 * \param iHeight input data (height of transform)
893 */
xITrMxN(Int bitDepth,TCoeff * coeff,TCoeff * block,Int iWidth,Int iHeight,Bool useDST,const Int maxTrDynamicRange)894 Void xITrMxN(Int bitDepth, TCoeff *coeff, TCoeff *block, Int iWidth, Int iHeight, Bool useDST, const Int maxTrDynamicRange)
895 {
896 static const Int TRANSFORM_MATRIX_SHIFT = g_transformMatrixShift[TRANSFORM_INVERSE];
897
898 Int shift_1st = TRANSFORM_MATRIX_SHIFT + 1; //1 has been added to shift_1st at the expense of shift_2nd
899 Int shift_2nd = (TRANSFORM_MATRIX_SHIFT + maxTrDynamicRange - 1) - bitDepth;
900 const TCoeff clipMinimum = -(1 << maxTrDynamicRange);
901 const TCoeff clipMaximum = (1 << maxTrDynamicRange) - 1;
902
903 assert(shift_1st >= 0);
904 assert(shift_2nd >= 0);
905
906 TCoeff tmp[MAX_TU_SIZE * MAX_TU_SIZE];
907
908 switch (iHeight)
909 {
910 case 4:
911 {
912 if ((iWidth == 4) && useDST) // Check for DCT or DST
913 {
914 fastInverseDst( coeff, tmp, shift_1st, clipMinimum, clipMaximum);
915 }
916 else partialButterflyInverse4 ( coeff, tmp, shift_1st, iWidth, clipMinimum, clipMaximum);
917 }
918 break;
919
920 case 8: partialButterflyInverse8 ( coeff, tmp, shift_1st, iWidth, clipMinimum, clipMaximum); break;
921 case 16: partialButterflyInverse16( coeff, tmp, shift_1st, iWidth, clipMinimum, clipMaximum); break;
922 case 32: partialButterflyInverse32( coeff, tmp, shift_1st, iWidth, clipMinimum, clipMaximum); break;
923
924 default:
925 assert(0); exit (1); break;
926 }
927
928 switch (iWidth)
929 {
930 // Clipping here is not in the standard, but is used to protect the "Pel" data type into which the inverse-transformed samples will be copied
931 case 4:
932 {
933 if ((iHeight == 4) && useDST) // Check for DCT or DST
934 {
935 fastInverseDst( tmp, block, shift_2nd, std::numeric_limits<Pel>::min(), std::numeric_limits<Pel>::max() );
936 }
937 else partialButterflyInverse4 ( tmp, block, shift_2nd, iHeight, std::numeric_limits<Pel>::min(), std::numeric_limits<Pel>::max());
938 }
939 break;
940
941 case 8: partialButterflyInverse8 ( tmp, block, shift_2nd, iHeight, std::numeric_limits<Pel>::min(), std::numeric_limits<Pel>::max()); break;
942 case 16: partialButterflyInverse16( tmp, block, shift_2nd, iHeight, std::numeric_limits<Pel>::min(), std::numeric_limits<Pel>::max()); break;
943 case 32: partialButterflyInverse32( tmp, block, shift_2nd, iHeight, std::numeric_limits<Pel>::min(), std::numeric_limits<Pel>::max()); break;
944
945 default:
946 assert(0); exit (1); break;
947 }
948 }
949
950
951 // To minimize the distortion only. No rate is considered.
signBitHidingHDQ(const ComponentID compID,TCoeff * pQCoef,TCoeff * pCoef,TCoeff * deltaU,const TUEntropyCodingParameters & codingParameters)952 Void TComTrQuant::signBitHidingHDQ( const ComponentID compID, TCoeff* pQCoef, TCoeff* pCoef, TCoeff* deltaU, const TUEntropyCodingParameters &codingParameters )
953 {
954 const UInt width = codingParameters.widthInGroups << MLS_CG_LOG2_WIDTH;
955 const UInt height = codingParameters.heightInGroups << MLS_CG_LOG2_HEIGHT;
956 const UInt groupSize = 1 << MLS_CG_SIZE;
957
958 const TCoeff entropyCodingMinimum = -(1 << g_maxTrDynamicRange[toChannelType(compID)]);
959 const TCoeff entropyCodingMaximum = (1 << g_maxTrDynamicRange[toChannelType(compID)]) - 1;
960
961 Int lastCG = -1;
962 Int absSum = 0 ;
963 Int n ;
964
965 for( Int subSet = (width*height-1) >> MLS_CG_SIZE; subSet >= 0; subSet-- )
966 {
967 Int subPos = subSet << MLS_CG_SIZE;
968 Int firstNZPosInCG=groupSize , lastNZPosInCG=-1 ;
969 absSum = 0 ;
970
971 for(n = groupSize-1; n >= 0; --n )
972 {
973 if( pQCoef[ codingParameters.scan[ n + subPos ]] )
974 {
975 lastNZPosInCG = n;
976 break;
977 }
978 }
979
980 for(n = 0; n <groupSize; n++ )
981 {
982 if( pQCoef[ codingParameters.scan[ n + subPos ]] )
983 {
984 firstNZPosInCG = n;
985 break;
986 }
987 }
988
989 for(n = firstNZPosInCG; n <=lastNZPosInCG; n++ )
990 {
991 absSum += Int(pQCoef[ codingParameters.scan[ n + subPos ]]);
992 }
993
994 if(lastNZPosInCG>=0 && lastCG==-1)
995 {
996 lastCG = 1 ;
997 }
998
999 if( lastNZPosInCG-firstNZPosInCG>=SBH_THRESHOLD )
1000 {
1001 UInt signbit = (pQCoef[codingParameters.scan[subPos+firstNZPosInCG]]>0?0:1) ;
1002 if( signbit!=(absSum&0x1) ) //compare signbit with sum_parity
1003 {
1004 TCoeff curCost = std::numeric_limits<TCoeff>::max();
1005 TCoeff minCostInc = std::numeric_limits<TCoeff>::max();
1006 Int minPos =-1, finalChange=0, curChange=0;
1007
1008 for( n = (lastCG==1?lastNZPosInCG:groupSize-1) ; n >= 0; --n )
1009 {
1010 UInt blkPos = codingParameters.scan[ n+subPos ];
1011 if(pQCoef[ blkPos ] != 0 )
1012 {
1013 if(deltaU[blkPos]>0)
1014 {
1015 curCost = - deltaU[blkPos];
1016 curChange=1 ;
1017 }
1018 else
1019 {
1020 //curChange =-1;
1021 if(n==firstNZPosInCG && abs(pQCoef[blkPos])==1)
1022 {
1023 curCost = std::numeric_limits<TCoeff>::max();
1024 }
1025 else
1026 {
1027 curCost = deltaU[blkPos];
1028 curChange =-1;
1029 }
1030 }
1031 }
1032 else
1033 {
1034 if(n<firstNZPosInCG)
1035 {
1036 UInt thisSignBit = (pCoef[blkPos]>=0?0:1);
1037 if(thisSignBit != signbit )
1038 {
1039 curCost = std::numeric_limits<TCoeff>::max();
1040 }
1041 else
1042 {
1043 curCost = - (deltaU[blkPos]) ;
1044 curChange = 1 ;
1045 }
1046 }
1047 else
1048 {
1049 curCost = - (deltaU[blkPos]) ;
1050 curChange = 1 ;
1051 }
1052 }
1053
1054 if( curCost<minCostInc)
1055 {
1056 minCostInc = curCost ;
1057 finalChange = curChange ;
1058 minPos = blkPos ;
1059 }
1060 } //CG loop
1061
1062 if(pQCoef[minPos] == entropyCodingMaximum || pQCoef[minPos] == entropyCodingMinimum)
1063 {
1064 finalChange = -1;
1065 }
1066
1067 if(pCoef[minPos]>=0)
1068 {
1069 pQCoef[minPos] += finalChange ;
1070 }
1071 else
1072 {
1073 pQCoef[minPos] -= finalChange ;
1074 }
1075 } // Hide
1076 }
1077 if(lastCG==1)
1078 {
1079 lastCG=0 ;
1080 }
1081 } // TU loop
1082
1083 return;
1084 }
1085
1086
xQuant(TComTU & rTu,TCoeff * pSrc,TCoeff * pDes,TCoeff * pArlDes,TCoeff & uiAbsSum,const ComponentID compID,const QpParam & cQP)1087 Void TComTrQuant::xQuant( TComTU &rTu,
1088 TCoeff * pSrc,
1089 TCoeff * pDes,
1090 #if ADAPTIVE_QP_SELECTION
1091 TCoeff *pArlDes,
1092 #endif
1093 TCoeff &uiAbsSum,
1094 const ComponentID compID,
1095 const QpParam &cQP )
1096 {
1097 const TComRectangle &rect = rTu.getRect(compID);
1098 const UInt uiWidth = rect.width;
1099 const UInt uiHeight = rect.height;
1100 TComDataCU* pcCU = rTu.getCU();
1101 const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
1102
1103 TCoeff* piCoef = pSrc;
1104 TCoeff* piQCoef = pDes;
1105 #if ADAPTIVE_QP_SELECTION
1106 TCoeff* piArlCCoef = pArlDes;
1107 #endif
1108
1109 const Bool useTransformSkip = pcCU->getTransformSkip(uiAbsPartIdx, compID);
1110
1111 Bool useRDOQ = useTransformSkip ? m_useRDOQTS : m_useRDOQ;
1112 if ( useRDOQ && (isLuma(compID) || RDOQ_CHROMA) )
1113 {
1114 #if ADAPTIVE_QP_SELECTION
1115 xRateDistOptQuant( rTu, piCoef, pDes, pArlDes, uiAbsSum, compID, cQP );
1116 #else
1117 xRateDistOptQuant( rTu, piCoef, pDes, uiAbsSum, compID, cQP );
1118 #endif
1119 }
1120 else
1121 {
1122 TUEntropyCodingParameters codingParameters;
1123 getTUEntropyCodingParameters(codingParameters, rTu, compID);
1124
1125 const TCoeff entropyCodingMinimum = -(1 << g_maxTrDynamicRange[toChannelType(compID)]);
1126 const TCoeff entropyCodingMaximum = (1 << g_maxTrDynamicRange[toChannelType(compID)]) - 1;
1127
1128 TCoeff deltaU[MAX_TU_SIZE * MAX_TU_SIZE];
1129
1130 const UInt uiLog2TrSize = rTu.GetEquivalentLog2TrSize(compID);
1131
1132 Int scalingListType = getScalingListType(pcCU->getPredictionMode(uiAbsPartIdx), compID);
1133 assert(scalingListType < SCALING_LIST_NUM);
1134 Int *piQuantCoeff = getQuantCoeff(scalingListType, cQP.rem, uiLog2TrSize-2);
1135
1136 const Bool enableScalingLists = getUseScalingList(uiWidth, uiHeight, (pcCU->getTransformSkip(uiAbsPartIdx, compID) != 0));
1137 const Int defaultQuantisationCoefficient = g_quantScales[cQP.rem];
1138
1139 /* for 422 chroma blocks, the effective scaling applied during transformation is not a power of 2, hence it cannot be
1140 * implemented as a bit-shift (the quantised result will be sqrt(2) * larger than required). Alternatively, adjust the
1141 * uiLog2TrSize applied in iTransformShift, such that the result is 1/sqrt(2) the required result (i.e. smaller)
1142 * Then a QP+3 (sqrt(2)) or QP-3 (1/sqrt(2)) method could be used to get the required result
1143 */
1144
1145 // Represents scaling through forward transform
1146 Int iTransformShift = getTransformShift(toChannelType(compID), uiLog2TrSize);
1147 if (useTransformSkip && pcCU->getSlice()->getSPS()->getUseExtendedPrecision())
1148 {
1149 iTransformShift = std::max<Int>(0, iTransformShift);
1150 }
1151
1152 const Int iQBits = QUANT_SHIFT + cQP.per + iTransformShift;
1153 // QBits will be OK for any internal bit depth as the reduction in transform shift is balanced by an increase in Qp_per due to QpBDOffset
1154
1155 #if ADAPTIVE_QP_SELECTION
1156 Int iQBitsC = MAX_INT;
1157 Int iAddC = MAX_INT;
1158
1159 if (m_bUseAdaptQpSelect)
1160 {
1161 iQBitsC = iQBits - ARL_C_PRECISION;
1162 iAddC = 1 << (iQBitsC-1);
1163 }
1164 #endif
1165
1166 const Int iAdd = (pcCU->getSlice()->getSliceType()==I_SLICE ? 171 : 85) << (iQBits-9);
1167 const Int qBits8 = iQBits - 8;
1168
1169 for( Int uiBlockPos = 0; uiBlockPos < uiWidth*uiHeight; uiBlockPos++ )
1170 {
1171 const TCoeff iLevel = piCoef[uiBlockPos];
1172 const TCoeff iSign = (iLevel < 0 ? -1: 1);
1173
1174 const Int64 tmpLevel = (Int64)abs(iLevel) * (enableScalingLists ? piQuantCoeff[uiBlockPos] : defaultQuantisationCoefficient);
1175
1176 #if ADAPTIVE_QP_SELECTION
1177 if( m_bUseAdaptQpSelect )
1178 {
1179 piArlCCoef[uiBlockPos] = (TCoeff)((tmpLevel + iAddC ) >> iQBitsC);
1180 }
1181 #endif
1182
1183 const TCoeff quantisedMagnitude = TCoeff((tmpLevel + iAdd ) >> iQBits);
1184 deltaU[uiBlockPos] = (TCoeff)((tmpLevel - (quantisedMagnitude<<iQBits) )>> qBits8);
1185
1186 uiAbsSum += quantisedMagnitude;
1187 const TCoeff quantisedCoefficient = quantisedMagnitude * iSign;
1188
1189 piQCoef[uiBlockPos] = Clip3<TCoeff>( entropyCodingMinimum, entropyCodingMaximum, quantisedCoefficient );
1190 } // for n
1191
1192 if( pcCU->getSlice()->getPPS()->getSignHideFlag() )
1193 {
1194 if(uiAbsSum >= 2) //this prevents TUs with only one coefficient of value 1 from being tested
1195 {
1196 signBitHidingHDQ( compID, piQCoef, piCoef, deltaU, codingParameters ) ;
1197 }
1198 }
1199 } //if RDOQ
1200 //return;
1201 }
1202
xDeQuant(TComTU & rTu,const TCoeff * pSrc,TCoeff * pDes,const ComponentID compID,const QpParam & cQP)1203 Void TComTrQuant::xDeQuant( TComTU &rTu,
1204 const TCoeff * pSrc,
1205 TCoeff * pDes,
1206 const ComponentID compID,
1207 const QpParam &cQP )
1208 {
1209 assert(compID<MAX_NUM_COMPONENT);
1210
1211 TComDataCU *pcCU = rTu.getCU();
1212 const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
1213 const TComRectangle &rect = rTu.getRect(compID);
1214 const UInt uiWidth = rect.width;
1215 const UInt uiHeight = rect.height;
1216 const TCoeff *const piQCoef = pSrc;
1217 TCoeff *const piCoef = pDes;
1218 const UInt uiLog2TrSize = rTu.GetEquivalentLog2TrSize(compID);
1219 const UInt numSamplesInBlock = uiWidth*uiHeight;
1220 const TCoeff transformMinimum = -(1 << g_maxTrDynamicRange[toChannelType(compID)]);
1221 const TCoeff transformMaximum = (1 << g_maxTrDynamicRange[toChannelType(compID)]) - 1;
1222 const Bool enableScalingLists = getUseScalingList(uiWidth, uiHeight, (pcCU->getTransformSkip(uiAbsPartIdx, compID) != 0));
1223 const Int scalingListType = getScalingListType(pcCU->getPredictionMode(uiAbsPartIdx), compID);
1224
1225 assert (scalingListType < SCALING_LIST_NUM);
1226 assert ( uiWidth <= m_uiMaxTrSize );
1227
1228 // Represents scaling through forward transform
1229 const Bool bClipTransformShiftTo0 = (pcCU->getTransformSkip(uiAbsPartIdx, compID) != 0) && pcCU->getSlice()->getSPS()->getUseExtendedPrecision();
1230 const Int originalTransformShift = getTransformShift(toChannelType(compID), uiLog2TrSize);
1231 const Int iTransformShift = bClipTransformShiftTo0 ? std::max<Int>(0, originalTransformShift) : originalTransformShift;
1232
1233 const Int QP_per = cQP.per;
1234 const Int QP_rem = cQP.rem;
1235
1236 const Int rightShift = (IQUANT_SHIFT - (iTransformShift + QP_per)) + (enableScalingLists ? LOG2_SCALING_LIST_NEUTRAL_VALUE : 0);
1237
1238 if(enableScalingLists)
1239 {
1240 //from the dequantisation equation:
1241 //iCoeffQ = ((Intermediate_Int(clipQCoef) * piDequantCoef[deQuantIdx]) + iAdd ) >> rightShift
1242 //(sizeof(Intermediate_Int) * 8) = inputBitDepth + dequantCoefBits - rightShift
1243 const UInt dequantCoefBits = 1 + IQUANT_SHIFT + SCALING_LIST_BITS;
1244 const UInt targetInputBitDepth = std::min<UInt>((g_maxTrDynamicRange[toChannelType(compID)] + 1), (((sizeof(Intermediate_Int) * 8) + rightShift) - dequantCoefBits));
1245
1246 const Intermediate_Int inputMinimum = -(1 << (targetInputBitDepth - 1));
1247 const Intermediate_Int inputMaximum = (1 << (targetInputBitDepth - 1)) - 1;
1248
1249 Int *piDequantCoef = getDequantCoeff(scalingListType,QP_rem,uiLog2TrSize-2);
1250
1251 if(rightShift > 0)
1252 {
1253 const Intermediate_Int iAdd = 1 << (rightShift - 1);
1254
1255 for( Int n = 0; n < numSamplesInBlock; n++ )
1256 {
1257 const TCoeff clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, piQCoef[n]));
1258 const Intermediate_Int iCoeffQ = ((Intermediate_Int(clipQCoef) * piDequantCoef[n]) + iAdd ) >> rightShift;
1259
1260 piCoef[n] = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ));
1261 }
1262 }
1263 else
1264 {
1265 const Int leftShift = -rightShift;
1266
1267 for( Int n = 0; n < numSamplesInBlock; n++ )
1268 {
1269 const TCoeff clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, piQCoef[n]));
1270 const Intermediate_Int iCoeffQ = (Intermediate_Int(clipQCoef) * piDequantCoef[n]) << leftShift;
1271
1272 piCoef[n] = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ));
1273 }
1274 }
1275 }
1276 else
1277 {
1278 const Int scale = g_invQuantScales[QP_rem];
1279 const Int scaleBits = (IQUANT_SHIFT + 1) ;
1280
1281 //from the dequantisation equation:
1282 //iCoeffQ = Intermediate_Int((Int64(clipQCoef) * scale + iAdd) >> rightShift);
1283 //(sizeof(Intermediate_Int) * 8) = inputBitDepth + scaleBits - rightShift
1284 const UInt targetInputBitDepth = std::min<UInt>((g_maxTrDynamicRange[toChannelType(compID)] + 1), (((sizeof(Intermediate_Int) * 8) + rightShift) - scaleBits));
1285 const Intermediate_Int inputMinimum = -(1 << (targetInputBitDepth - 1));
1286 const Intermediate_Int inputMaximum = (1 << (targetInputBitDepth - 1)) - 1;
1287
1288 if (rightShift > 0)
1289 {
1290 const Intermediate_Int iAdd = 1 << (rightShift - 1);
1291
1292 for( Int n = 0; n < numSamplesInBlock; n++ )
1293 {
1294 const TCoeff clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, piQCoef[n]));
1295 const Intermediate_Int iCoeffQ = (Intermediate_Int(clipQCoef) * scale + iAdd) >> rightShift;
1296
1297 piCoef[n] = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ));
1298 }
1299 }
1300 else
1301 {
1302 const Int leftShift = -rightShift;
1303
1304 for( Int n = 0; n < numSamplesInBlock; n++ )
1305 {
1306 const TCoeff clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, piQCoef[n]));
1307 const Intermediate_Int iCoeffQ = (Intermediate_Int(clipQCoef) * scale) << leftShift;
1308
1309 piCoef[n] = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ));
1310 }
1311 }
1312 }
1313 }
1314
1315
init(UInt uiMaxTrSize,Bool bUseRDOQ,Bool bUseRDOQTS,Bool bEnc,Bool useTransformSkipFast,Bool bUseAdaptQpSelect)1316 Void TComTrQuant::init( UInt uiMaxTrSize,
1317 Bool bUseRDOQ,
1318 Bool bUseRDOQTS,
1319 Bool bEnc,
1320 Bool useTransformSkipFast
1321 #if ADAPTIVE_QP_SELECTION
1322 , Bool bUseAdaptQpSelect
1323 #endif
1324 )
1325 {
1326 m_uiMaxTrSize = uiMaxTrSize;
1327 m_bEnc = bEnc;
1328 m_useRDOQ = bUseRDOQ;
1329 m_useRDOQTS = bUseRDOQTS;
1330 #if ADAPTIVE_QP_SELECTION
1331 m_bUseAdaptQpSelect = bUseAdaptQpSelect;
1332 #endif
1333 m_useTransformSkipFast = useTransformSkipFast;
1334 }
1335
1336
transformNxN(TComTU & rTu,const ComponentID compID,Pel * pcResidual,const UInt uiStride,TCoeff * rpcCoeff,TCoeff * pcArlCoeff,TCoeff & uiAbsSum,const QpParam & cQP)1337 Void TComTrQuant::transformNxN( TComTU & rTu,
1338 const ComponentID compID,
1339 Pel * pcResidual,
1340 const UInt uiStride,
1341 TCoeff * rpcCoeff,
1342 #if ADAPTIVE_QP_SELECTION
1343 TCoeff * pcArlCoeff,
1344 #endif
1345 TCoeff & uiAbsSum,
1346 const QpParam & cQP
1347 )
1348 {
1349 const TComRectangle &rect = rTu.getRect(compID);
1350 const UInt uiWidth = rect.width;
1351 const UInt uiHeight = rect.height;
1352 TComDataCU* pcCU = rTu.getCU();
1353 const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
1354 const UInt uiOrgTrDepth = rTu.GetTransformDepthRel();
1355
1356 uiAbsSum=0;
1357
1358 RDPCMMode rdpcmMode = RDPCM_OFF;
1359 rdpcmNxN( rTu, compID, pcResidual, uiStride, cQP, rpcCoeff, uiAbsSum, rdpcmMode );
1360
1361 if (rdpcmMode == RDPCM_OFF)
1362 {
1363 uiAbsSum = 0;
1364 //transform and quantise
1365 if(pcCU->getCUTransquantBypass(uiAbsPartIdx))
1366 {
1367 const Bool rotateResidual = rTu.isNonTransformedResidualRotated(compID);
1368 const UInt uiSizeMinus1 = (uiWidth * uiHeight) - 1;
1369
1370 for (UInt y = 0, coefficientIndex = 0; y<uiHeight; y++)
1371 {
1372 for (UInt x = 0; x<uiWidth; x++, coefficientIndex++)
1373 {
1374 const Pel currentSample = pcResidual[(y * uiStride) + x];
1375
1376 rpcCoeff[rotateResidual ? (uiSizeMinus1 - coefficientIndex) : coefficientIndex] = currentSample;
1377 uiAbsSum += TCoeff(abs(currentSample));
1378 }
1379 }
1380 }
1381 else
1382 {
1383 #ifdef DEBUG_TRANSFORM_AND_QUANTISE
1384 std::cout << g_debugCounter << ": " << uiWidth << "x" << uiHeight << " channel " << compID << " TU at input to transform\n";
1385 printBlock(pcResidual, uiWidth, uiHeight, uiStride);
1386 #endif
1387
1388 assert( (pcCU->getSlice()->getSPS()->getMaxTrSize() >= uiWidth) );
1389
1390 if(pcCU->getTransformSkip(uiAbsPartIdx, compID) != 0)
1391 {
1392 xTransformSkip( pcResidual, uiStride, m_plTempCoeff, rTu, compID );
1393 }
1394 else
1395 {
1396 xT( compID, rTu.useDST(compID), pcResidual, uiStride, m_plTempCoeff, uiWidth, uiHeight );
1397 }
1398
1399 #ifdef DEBUG_TRANSFORM_AND_QUANTISE
1400 std::cout << g_debugCounter << ": " << uiWidth << "x" << uiHeight << " channel " << compID << " TU between transform and quantiser\n";
1401 printBlock(m_plTempCoeff, uiWidth, uiHeight, uiWidth);
1402 #endif
1403
1404 xQuant( rTu, m_plTempCoeff, rpcCoeff,
1405
1406 #if ADAPTIVE_QP_SELECTION
1407 pcArlCoeff,
1408 #endif
1409 uiAbsSum, compID, cQP );
1410
1411 #ifdef DEBUG_TRANSFORM_AND_QUANTISE
1412 std::cout << g_debugCounter << ": " << uiWidth << "x" << uiHeight << " channel " << compID << " TU at output of quantiser\n";
1413 printBlock(rpcCoeff, uiWidth, uiHeight, uiWidth);
1414 #endif
1415 }
1416 }
1417
1418 //set the CBF
1419 pcCU->setCbfPartRange((((uiAbsSum > 0) ? 1 : 0) << uiOrgTrDepth), compID, uiAbsPartIdx, rTu.GetAbsPartIdxNumParts(compID));
1420 }
1421
1422
invTransformNxN(TComTU & rTu,const ComponentID compID,Pel * pcResidual,const UInt uiStride,TCoeff * pcCoeff,const QpParam & cQP DEBUG_STRING_FN_DECLAREP (psDebug))1423 Void TComTrQuant::invTransformNxN( TComTU &rTu,
1424 const ComponentID compID,
1425 Pel *pcResidual,
1426 const UInt uiStride,
1427 TCoeff * pcCoeff,
1428 const QpParam &cQP
1429 DEBUG_STRING_FN_DECLAREP(psDebug))
1430 {
1431 TComDataCU* pcCU=rTu.getCU();
1432 const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
1433 const TComRectangle &rect = rTu.getRect(compID);
1434 const UInt uiWidth = rect.width;
1435 const UInt uiHeight = rect.height;
1436
1437 if (uiWidth != uiHeight) //for intra, the TU will have been split above this level, so this condition won't be true, hence this only affects inter
1438 {
1439 //------------------------------------------------
1440
1441 //recurse deeper
1442
1443 TComTURecurse subTURecurse(rTu, false, TComTU::VERTICAL_SPLIT, true, compID);
1444
1445 do
1446 {
1447 //------------------
1448
1449 const UInt lineOffset = subTURecurse.GetSectionNumber() * subTURecurse.getRect(compID).height;
1450
1451 Pel *subTUResidual = pcResidual + (lineOffset * uiStride);
1452 TCoeff *subTUCoefficients = pcCoeff + (lineOffset * subTURecurse.getRect(compID).width);
1453
1454 invTransformNxN(subTURecurse, compID, subTUResidual, uiStride, subTUCoefficients, cQP DEBUG_STRING_PASS_INTO(psDebug));
1455
1456 //------------------
1457
1458 }
1459 while (subTURecurse.nextSection(rTu));
1460
1461 //------------------------------------------------
1462
1463 return;
1464 }
1465
1466 #if defined DEBUG_STRING
1467 if (psDebug)
1468 {
1469 std::stringstream ss(stringstream::out);
1470 printBlockToStream(ss, (compID==0)?"###InvTran ip Ch0: " : ((compID==1)?"###InvTran ip Ch1: ":"###InvTran ip Ch2: "), pcCoeff, uiWidth, uiHeight, uiWidth);
1471 DEBUG_STRING_APPEND((*psDebug), ss.str())
1472 }
1473 #endif
1474
1475 if(pcCU->getCUTransquantBypass(uiAbsPartIdx))
1476 {
1477 const Bool rotateResidual = rTu.isNonTransformedResidualRotated(compID);
1478 const UInt uiSizeMinus1 = (uiWidth * uiHeight) - 1;
1479
1480 for (UInt y = 0, coefficientIndex = 0; y<uiHeight; y++)
1481 {
1482 for (UInt x = 0; x<uiWidth; x++, coefficientIndex++)
1483 {
1484 pcResidual[(y * uiStride) + x] = Pel(pcCoeff[rotateResidual ? (uiSizeMinus1 - coefficientIndex) : coefficientIndex]);
1485 }
1486 }
1487 }
1488 else
1489 {
1490 #ifdef DEBUG_TRANSFORM_AND_QUANTISE
1491 std::cout << g_debugCounter << ": " << uiWidth << "x" << uiHeight << " channel " << compID << " TU at input to dequantiser\n";
1492 printBlock(pcCoeff, uiWidth, uiHeight, uiWidth);
1493 #endif
1494
1495 xDeQuant(rTu, pcCoeff, m_plTempCoeff, compID, cQP);
1496
1497 #ifdef DEBUG_TRANSFORM_AND_QUANTISE
1498 std::cout << g_debugCounter << ": " << uiWidth << "x" << uiHeight << " channel " << compID << " TU between dequantiser and inverse-transform\n";
1499 printBlock(m_plTempCoeff, uiWidth, uiHeight, uiWidth);
1500 #endif
1501
1502 #if defined DEBUG_STRING
1503 if (psDebug)
1504 {
1505 std::stringstream ss(stringstream::out);
1506 printBlockToStream(ss, "###InvTran deq: ", m_plTempCoeff, uiWidth, uiHeight, uiWidth);
1507 (*psDebug)+=ss.str();
1508 }
1509 #endif
1510
1511 if(pcCU->getTransformSkip(uiAbsPartIdx, compID))
1512 {
1513 xITransformSkip( m_plTempCoeff, pcResidual, uiStride, rTu, compID );
1514
1515 #if defined DEBUG_STRING
1516 if (psDebug)
1517 {
1518 std::stringstream ss(stringstream::out);
1519 printBlockToStream(ss, "###InvTran resi: ", pcResidual, uiWidth, uiHeight, uiStride);
1520 (*psDebug)+=ss.str();
1521 (*psDebug)+="(<- was a Transform-skipped block)\n";
1522 }
1523 #endif
1524 }
1525 else
1526 {
1527 xIT( compID, rTu.useDST(compID), m_plTempCoeff, pcResidual, uiStride, uiWidth, uiHeight );
1528
1529 #if defined DEBUG_STRING
1530 if (psDebug)
1531 {
1532 std::stringstream ss(stringstream::out);
1533 printBlockToStream(ss, "###InvTran resi: ", pcResidual, uiWidth, uiHeight, uiStride);
1534 (*psDebug)+=ss.str();
1535 (*psDebug)+="(<- was a Transformed block)\n";
1536 }
1537 #endif
1538 }
1539
1540 #ifdef DEBUG_TRANSFORM_AND_QUANTISE
1541 std::cout << g_debugCounter << ": " << uiWidth << "x" << uiHeight << " channel " << compID << " TU at output of inverse-transform\n";
1542 printBlock(pcResidual, uiWidth, uiHeight, uiStride);
1543 g_debugCounter++;
1544 #endif
1545 }
1546
1547 invRdpcmNxN( rTu, compID, pcResidual, uiStride );
1548 }
1549
invRecurTransformNxN(const ComponentID compID,TComYuv * pResidual,TComTU & rTu)1550 Void TComTrQuant::invRecurTransformNxN( const ComponentID compID,
1551 TComYuv *pResidual,
1552 TComTU &rTu)
1553 {
1554 if (!rTu.ProcessComponentSection(compID)) return;
1555
1556 TComDataCU* pcCU = rTu.getCU();
1557 UInt absPartIdxTU = rTu.GetAbsPartIdxTU();
1558 UInt uiTrMode=rTu.GetTransformDepthRel();
1559 if( (pcCU->getCbf(absPartIdxTU, compID, uiTrMode) == 0) && (isLuma(compID) || !pcCU->getSlice()->getPPS()->getUseCrossComponentPrediction()) )
1560 {
1561 return;
1562 }
1563
1564 if( uiTrMode == pcCU->getTransformIdx( absPartIdxTU ) )
1565 {
1566 const TComRectangle &tuRect = rTu.getRect(compID);
1567 const Int uiStride = pResidual->getStride( compID );
1568 Pel *rpcResidual = pResidual->getAddr( compID );
1569 UInt uiAddr = (tuRect.x0 + uiStride*tuRect.y0);
1570 Pel *pResi = rpcResidual + uiAddr;
1571 TCoeff *pcCoeff = pcCU->getCoeff(compID) + rTu.getCoefficientOffset(compID);
1572
1573 const QpParam cQP(*pcCU, compID);
1574
1575 if(pcCU->getCbf(absPartIdxTU, compID, uiTrMode) != 0)
1576 {
1577 DEBUG_STRING_NEW(sTemp)
1578 #ifdef DEBUG_STRING
1579 std::string *psDebug=((DebugOptionList::DebugString_InvTran.getInt()&(pcCU->isIntra(absPartIdxTU)?1:(pcCU->isInter(absPartIdxTU)?2:4)))!=0) ? &sTemp : 0;
1580 #endif
1581
1582 invTransformNxN( rTu, compID, pResi, uiStride, pcCoeff, cQP DEBUG_STRING_PASS_INTO(psDebug) );
1583
1584 #ifdef DEBUG_STRING
1585 if (psDebug != 0)
1586 std::cout << (*psDebug);
1587 #endif
1588 }
1589
1590 if (isChroma(compID) && (pcCU->getCrossComponentPredictionAlpha(absPartIdxTU, compID) != 0))
1591 {
1592 const Pel *piResiLuma = pResidual->getAddr( COMPONENT_Y );
1593 const Int strideLuma = pResidual->getStride( COMPONENT_Y );
1594 const Int tuWidth = rTu.getRect( compID ).width;
1595 const Int tuHeight = rTu.getRect( compID ).height;
1596
1597 if(pcCU->getCbf(absPartIdxTU, COMPONENT_Y, uiTrMode) != 0)
1598 {
1599 pResi = rpcResidual + uiAddr;
1600 const Pel *pResiLuma = piResiLuma + uiAddr;
1601
1602 crossComponentPrediction( rTu, compID, pResiLuma, pResi, pResi, tuWidth, tuHeight, strideLuma, uiStride, uiStride, true );
1603 }
1604 }
1605 }
1606 else
1607 {
1608 TComTURecurse tuRecurseChild(rTu, false);
1609 do
1610 {
1611 invRecurTransformNxN( compID, pResidual, tuRecurseChild );
1612 }
1613 while (tuRecurseChild.nextSection(rTu));
1614 }
1615 }
1616
applyForwardRDPCM(TComTU & rTu,const ComponentID compID,Pel * pcResidual,const UInt uiStride,const QpParam & cQP,TCoeff * pcCoeff,TCoeff & uiAbsSum,const RDPCMMode mode)1617 Void TComTrQuant::applyForwardRDPCM( TComTU& rTu, const ComponentID compID, Pel* pcResidual, const UInt uiStride, const QpParam& cQP, TCoeff* pcCoeff, TCoeff &uiAbsSum, const RDPCMMode mode )
1618 {
1619 TComDataCU *pcCU=rTu.getCU();
1620 const UInt uiAbsPartIdx=rTu.GetAbsPartIdxTU();
1621
1622 const Bool bLossless = pcCU->getCUTransquantBypass( uiAbsPartIdx );
1623 const UInt uiWidth = rTu.getRect(compID).width;
1624 const UInt uiHeight = rTu.getRect(compID).height;
1625 const Bool rotateResidual = rTu.isNonTransformedResidualRotated(compID);
1626 const UInt uiSizeMinus1 = (uiWidth * uiHeight) - 1;
1627
1628 Pel reconstructedResi[MAX_TU_SIZE * MAX_TU_SIZE];
1629
1630 UInt uiX = 0;
1631 UInt uiY = 0;
1632
1633 UInt &majorAxis = (mode == RDPCM_HOR) ? uiX : uiY;
1634 UInt &minorAxis = (mode == RDPCM_HOR) ? uiY : uiX;
1635 const UInt majorAxisLimit = (mode == RDPCM_HOR) ? uiWidth : uiHeight;
1636 const UInt minorAxisLimit = (mode == RDPCM_HOR) ? uiHeight : uiWidth;
1637 const UInt referenceSampleOffset = (mode == RDPCM_HOR) ? 1 : uiWidth;
1638
1639 const Bool bUseHalfRoundingPoint = (mode != RDPCM_OFF);
1640
1641 uiAbsSum = 0;
1642
1643 for ( majorAxis = 0; majorAxis < majorAxisLimit; majorAxis++ )
1644 {
1645 for ( minorAxis = 0; minorAxis < minorAxisLimit; minorAxis++ )
1646 {
1647 const UInt sampleIndex = (uiY * uiWidth) + uiX;
1648 const UInt coefficientIndex = (rotateResidual ? (uiSizeMinus1-sampleIndex) : sampleIndex);
1649 const Pel currentSample = pcResidual[(uiY * uiStride) + uiX];
1650 const Pel referenceSample = ((mode != RDPCM_OFF) && (majorAxis > 0)) ? reconstructedResi[sampleIndex - referenceSampleOffset] : 0;
1651
1652 const Pel encoderSideDelta = currentSample - referenceSample;
1653
1654 Pel reconstructedDelta;
1655 if ( bLossless )
1656 {
1657 pcCoeff[coefficientIndex] = encoderSideDelta;
1658 reconstructedDelta = encoderSideDelta;
1659 }
1660 else
1661 {
1662 transformSkipQuantOneSample(rTu, compID, encoderSideDelta, pcCoeff, coefficientIndex, cQP, bUseHalfRoundingPoint);
1663 invTrSkipDeQuantOneSample (rTu, compID, pcCoeff[coefficientIndex], reconstructedDelta, cQP, coefficientIndex);
1664 }
1665
1666 uiAbsSum += abs(pcCoeff[coefficientIndex]);
1667
1668 reconstructedResi[sampleIndex] = reconstructedDelta + referenceSample;
1669 }
1670 }
1671 }
1672
rdpcmNxN(TComTU & rTu,const ComponentID compID,Pel * pcResidual,const UInt uiStride,const QpParam & cQP,TCoeff * pcCoeff,TCoeff & uiAbsSum,RDPCMMode & rdpcmMode)1673 Void TComTrQuant::rdpcmNxN ( TComTU& rTu, const ComponentID compID, Pel* pcResidual, const UInt uiStride, const QpParam& cQP, TCoeff* pcCoeff, TCoeff &uiAbsSum, RDPCMMode& rdpcmMode )
1674 {
1675 TComDataCU *pcCU=rTu.getCU();
1676 const UInt uiAbsPartIdx=rTu.GetAbsPartIdxTU();
1677
1678 if (!pcCU->isRDPCMEnabled(uiAbsPartIdx) || ((pcCU->getTransformSkip(uiAbsPartIdx, compID) == 0) && !pcCU->getCUTransquantBypass(uiAbsPartIdx)))
1679 {
1680 rdpcmMode = RDPCM_OFF;
1681 }
1682 else if ( pcCU->isIntra( uiAbsPartIdx ) )
1683 {
1684 const ChromaFormat chFmt = pcCU->getPic()->getPicYuvOrg()->getChromaFormat();
1685 const ChannelType chType = toChannelType(compID);
1686 const UInt uiChPredMode = pcCU->getIntraDir( chType, uiAbsPartIdx );
1687 const UInt uiChCodedMode = (uiChPredMode==DM_CHROMA_IDX && isChroma(compID)) ? pcCU->getIntraDir(CHANNEL_TYPE_LUMA, getChromasCorrespondingPULumaIdx(uiAbsPartIdx, chFmt)) : uiChPredMode;
1688 const UInt uiChFinalMode = ((chFmt == CHROMA_422) && isChroma(compID)) ? g_chroma422IntraAngleMappingTable[uiChCodedMode] : uiChCodedMode;
1689
1690 if (uiChFinalMode == VER_IDX || uiChFinalMode == HOR_IDX)
1691 {
1692 rdpcmMode = (uiChFinalMode == VER_IDX) ? RDPCM_VER : RDPCM_HOR;
1693 applyForwardRDPCM( rTu, compID, pcResidual, uiStride, cQP, pcCoeff, uiAbsSum, rdpcmMode );
1694 }
1695 else rdpcmMode = RDPCM_OFF;
1696 }
1697 else // not intra, need to select the best mode
1698 {
1699 const UInt uiWidth = rTu.getRect(compID).width;
1700 const UInt uiHeight = rTu.getRect(compID).height;
1701
1702 RDPCMMode bestMode = NUMBER_OF_RDPCM_MODES;
1703 TCoeff bestAbsSum = std::numeric_limits<TCoeff>::max();
1704 TCoeff bestCoefficients[MAX_TU_SIZE * MAX_TU_SIZE];
1705
1706 for (UInt modeIndex = 0; modeIndex < NUMBER_OF_RDPCM_MODES; modeIndex++)
1707 {
1708 const RDPCMMode mode = RDPCMMode(modeIndex);
1709
1710 TCoeff currAbsSum = 0;
1711
1712 applyForwardRDPCM( rTu, compID, pcResidual, uiStride, cQP, pcCoeff, currAbsSum, mode );
1713
1714 if (currAbsSum < bestAbsSum)
1715 {
1716 bestMode = mode;
1717 bestAbsSum = currAbsSum;
1718 if (mode != RDPCM_OFF)
1719 {
1720 memcpy(bestCoefficients, pcCoeff, (uiWidth * uiHeight * sizeof(TCoeff)));
1721 }
1722 }
1723 }
1724
1725 rdpcmMode = bestMode;
1726 uiAbsSum = bestAbsSum;
1727
1728 if (rdpcmMode != RDPCM_OFF) //the TU is re-transformed and quantised if DPCM_OFF is returned, so there is no need to preserve it here
1729 {
1730 memcpy(pcCoeff, bestCoefficients, (uiWidth * uiHeight * sizeof(TCoeff)));
1731 }
1732 }
1733
1734 pcCU->setExplicitRdpcmModePartRange(rdpcmMode, compID, uiAbsPartIdx, rTu.GetAbsPartIdxNumParts(compID));
1735 }
1736
invRdpcmNxN(TComTU & rTu,const ComponentID compID,Pel * pcResidual,const UInt uiStride)1737 Void TComTrQuant::invRdpcmNxN( TComTU& rTu, const ComponentID compID, Pel* pcResidual, const UInt uiStride )
1738 {
1739 TComDataCU *pcCU=rTu.getCU();
1740 const UInt uiAbsPartIdx=rTu.GetAbsPartIdxTU();
1741
1742 if (pcCU->isRDPCMEnabled( uiAbsPartIdx ) && ((pcCU->getTransformSkip(uiAbsPartIdx, compID ) != 0) || pcCU->getCUTransquantBypass(uiAbsPartIdx)))
1743 {
1744 const UInt uiWidth = rTu.getRect(compID).width;
1745 const UInt uiHeight = rTu.getRect(compID).height;
1746
1747 RDPCMMode rdpcmMode = RDPCM_OFF;
1748
1749 if ( pcCU->isIntra( uiAbsPartIdx ) )
1750 {
1751 const ChromaFormat chFmt = pcCU->getPic()->getPicYuvRec()->getChromaFormat();
1752 const ChannelType chType = toChannelType(compID);
1753 const UInt uiChPredMode = pcCU->getIntraDir( chType, uiAbsPartIdx );
1754 const UInt uiChCodedMode = (uiChPredMode==DM_CHROMA_IDX && isChroma(compID)) ? pcCU->getIntraDir(CHANNEL_TYPE_LUMA, getChromasCorrespondingPULumaIdx(uiAbsPartIdx, chFmt)) : uiChPredMode;
1755 const UInt uiChFinalMode = ((chFmt == CHROMA_422) && isChroma(compID)) ? g_chroma422IntraAngleMappingTable[uiChCodedMode] : uiChCodedMode;
1756
1757 if (uiChFinalMode == VER_IDX || uiChFinalMode == HOR_IDX)
1758 {
1759 rdpcmMode = (uiChFinalMode == VER_IDX) ? RDPCM_VER : RDPCM_HOR;
1760 }
1761 }
1762 else // not intra case
1763 {
1764 rdpcmMode = RDPCMMode(pcCU->getExplicitRdpcmMode( compID, uiAbsPartIdx ));
1765 }
1766
1767 if (rdpcmMode == RDPCM_VER)
1768 {
1769 pcResidual += uiStride; //start from row 1
1770
1771 for( UInt uiY = 1; uiY < uiHeight; uiY++ )
1772 {
1773 for( UInt uiX = 0; uiX < uiWidth; uiX++ )
1774 {
1775 pcResidual[ uiX ] = pcResidual[ uiX ] + pcResidual [ (Int)uiX - (Int)uiStride ];
1776 }
1777 pcResidual += uiStride;
1778 }
1779 }
1780 else if (rdpcmMode == RDPCM_HOR)
1781 {
1782 for( UInt uiY = 0; uiY < uiHeight; uiY++ )
1783 {
1784 for( UInt uiX = 1; uiX < uiWidth; uiX++ )
1785 {
1786 pcResidual[ uiX ] = pcResidual[ uiX ] + pcResidual [ (Int)uiX-1 ];
1787 }
1788 pcResidual += uiStride;
1789 }
1790 }
1791 }
1792 }
1793
1794 // ------------------------------------------------------------------------------------------------
1795 // Logical transform
1796 // ------------------------------------------------------------------------------------------------
1797
1798 /** Wrapper function between HM interface and core NxN forward transform (2D)
1799 * \param piBlkResi input data (residual)
1800 * \param psCoeff output data (transform coefficients)
1801 * \param uiStride stride of input residual data
1802 * \param iSize transform size (iSize x iSize)
1803 * \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
1804 */
xT(const ComponentID compID,Bool useDST,Pel * piBlkResi,UInt uiStride,TCoeff * psCoeff,Int iWidth,Int iHeight)1805 Void TComTrQuant::xT( const ComponentID compID, Bool useDST, Pel* piBlkResi, UInt uiStride, TCoeff* psCoeff, Int iWidth, Int iHeight )
1806 {
1807 #if MATRIX_MULT
1808 if( iWidth == iHeight)
1809 {
1810 xTr(g_bitDepth[toChannelType(compID)], piBlkResi, psCoeff, uiStride, (UInt)iWidth, useDST, g_maxTrDynamicRange[toChannelType(compID)]);
1811 return;
1812 }
1813 #endif
1814
1815 TCoeff block[ MAX_TU_SIZE * MAX_TU_SIZE ];
1816 TCoeff coeff[ MAX_TU_SIZE * MAX_TU_SIZE ];
1817
1818 for (Int y = 0; y < iHeight; y++)
1819 for (Int x = 0; x < iWidth; x++)
1820 {
1821 block[(y * iWidth) + x] = piBlkResi[(y * uiStride) + x];
1822 }
1823
1824 xTrMxN( g_bitDepth[toChannelType(compID)], block, coeff, iWidth, iHeight, useDST, g_maxTrDynamicRange[toChannelType(compID)] );
1825
1826 memcpy(psCoeff, coeff, (iWidth * iHeight * sizeof(TCoeff)));
1827 }
1828
1829 /** Wrapper function between HM interface and core NxN inverse transform (2D)
1830 * \param plCoef input data (transform coefficients)
1831 * \param pResidual output data (residual)
1832 * \param uiStride stride of input residual data
1833 * \param iSize transform size (iSize x iSize)
1834 * \param uiMode is Intra Prediction mode used in Mode-Dependent DCT/DST only
1835 */
xIT(const ComponentID compID,Bool useDST,TCoeff * plCoef,Pel * pResidual,UInt uiStride,Int iWidth,Int iHeight)1836 Void TComTrQuant::xIT( const ComponentID compID, Bool useDST, TCoeff* plCoef, Pel* pResidual, UInt uiStride, Int iWidth, Int iHeight )
1837 {
1838 #if MATRIX_MULT
1839 if( iWidth == iHeight )
1840 {
1841 #if O0043_BEST_EFFORT_DECODING
1842 xITr(g_bitDepthInStream[toChannelType(compID)], plCoef, pResidual, uiStride, (UInt)iWidth, useDST, g_maxTrDynamicRange[toChannelType(compID)]);
1843 #else
1844 xITr(g_bitDepth[toChannelType(compID)], plCoef, pResidual, uiStride, (UInt)iWidth, useDST, g_maxTrDynamicRange[toChannelType(compID)]);
1845 #endif
1846 return;
1847 }
1848 #endif
1849
1850 TCoeff block[ MAX_TU_SIZE * MAX_TU_SIZE ];
1851 TCoeff coeff[ MAX_TU_SIZE * MAX_TU_SIZE ];
1852
1853 memcpy(coeff, plCoef, (iWidth * iHeight * sizeof(TCoeff)));
1854
1855 #if O0043_BEST_EFFORT_DECODING
1856 xITrMxN( g_bitDepthInStream[toChannelType(compID)], coeff, block, iWidth, iHeight, useDST, g_maxTrDynamicRange[toChannelType(compID)] );
1857 #else
1858 xITrMxN( g_bitDepth[toChannelType(compID)], coeff, block, iWidth, iHeight, useDST, g_maxTrDynamicRange[toChannelType(compID)] );
1859 #endif
1860
1861 for (Int y = 0; y < iHeight; y++)
1862 for (Int x = 0; x < iWidth; x++)
1863 {
1864 pResidual[(y * uiStride) + x] = Pel(block[(y * iWidth) + x]);
1865 }
1866 }
1867
1868 /** Wrapper function between HM interface and core 4x4 transform skipping
1869 * \param piBlkResi input data (residual)
1870 * \param psCoeff output data (transform coefficients)
1871 * \param uiStride stride of input residual data
1872 * \param iSize transform size (iSize x iSize)
1873 */
xTransformSkip(Pel * piBlkResi,UInt uiStride,TCoeff * psCoeff,TComTU & rTu,const ComponentID component)1874 Void TComTrQuant::xTransformSkip( Pel* piBlkResi, UInt uiStride, TCoeff* psCoeff, TComTU &rTu, const ComponentID component )
1875 {
1876 const TComRectangle &rect = rTu.getRect(component);
1877 const Int width = rect.width;
1878 const Int height = rect.height;
1879
1880 Int iTransformShift = getTransformShift(toChannelType(component), rTu.GetEquivalentLog2TrSize(component));
1881 if (rTu.getCU()->getSlice()->getSPS()->getUseExtendedPrecision())
1882 {
1883 iTransformShift = std::max<Int>(0, iTransformShift);
1884 }
1885
1886 const Bool rotateResidual = rTu.isNonTransformedResidualRotated(component);
1887 const UInt uiSizeMinus1 = (width * height) - 1;
1888
1889 if (iTransformShift >= 0)
1890 {
1891 for (UInt y = 0, coefficientIndex = 0; y < height; y++)
1892 {
1893 for (UInt x = 0; x < width; x++, coefficientIndex++)
1894 {
1895 psCoeff[rotateResidual ? (uiSizeMinus1 - coefficientIndex) : coefficientIndex] = TCoeff(piBlkResi[(y * uiStride) + x]) << iTransformShift;
1896 }
1897 }
1898 }
1899 else //for very high bit depths
1900 {
1901 iTransformShift = -iTransformShift;
1902 const TCoeff offset = 1 << (iTransformShift - 1);
1903
1904 for (UInt y = 0, coefficientIndex = 0; y < height; y++)
1905 {
1906 for (UInt x = 0; x < width; x++, coefficientIndex++)
1907 {
1908 psCoeff[rotateResidual ? (uiSizeMinus1 - coefficientIndex) : coefficientIndex] = (TCoeff(piBlkResi[(y * uiStride) + x]) + offset) >> iTransformShift;
1909 }
1910 }
1911 }
1912 }
1913
1914 /** Wrapper function between HM interface and core NxN transform skipping
1915 * \param plCoef input data (coefficients)
1916 * \param pResidual output data (residual)
1917 * \param uiStride stride of input residual data
1918 * \param iSize transform size (iSize x iSize)
1919 */
xITransformSkip(TCoeff * plCoef,Pel * pResidual,UInt uiStride,TComTU & rTu,const ComponentID component)1920 Void TComTrQuant::xITransformSkip( TCoeff* plCoef, Pel* pResidual, UInt uiStride, TComTU &rTu, const ComponentID component )
1921 {
1922 const TComRectangle &rect = rTu.getRect(component);
1923 const Int width = rect.width;
1924 const Int height = rect.height;
1925
1926 Int iTransformShift = getTransformShift(toChannelType(component), rTu.GetEquivalentLog2TrSize(component));
1927 if (rTu.getCU()->getSlice()->getSPS()->getUseExtendedPrecision())
1928 {
1929 iTransformShift = std::max<Int>(0, iTransformShift);
1930 }
1931
1932 const Bool rotateResidual = rTu.isNonTransformedResidualRotated(component);
1933 const UInt uiSizeMinus1 = (width * height) - 1;
1934
1935 if (iTransformShift >= 0)
1936 {
1937 const TCoeff offset = iTransformShift==0 ? 0 : (1 << (iTransformShift - 1));
1938
1939 for (UInt y = 0, coefficientIndex = 0; y < height; y++)
1940 {
1941 for (UInt x = 0; x < width; x++, coefficientIndex++)
1942 {
1943 pResidual[(y * uiStride) + x] = Pel((plCoef[rotateResidual ? (uiSizeMinus1 - coefficientIndex) : coefficientIndex] + offset) >> iTransformShift);
1944 }
1945 }
1946 }
1947 else //for very high bit depths
1948 {
1949 iTransformShift = -iTransformShift;
1950
1951 for (UInt y = 0, coefficientIndex = 0; y < height; y++)
1952 {
1953 for (UInt x = 0; x < width; x++, coefficientIndex++)
1954 {
1955 pResidual[(y * uiStride) + x] = Pel(plCoef[rotateResidual ? (uiSizeMinus1 - coefficientIndex) : coefficientIndex] << iTransformShift);
1956 }
1957 }
1958 }
1959 }
1960
1961 /** RDOQ with CABAC
1962 * \param pcCU pointer to coding unit structure
1963 * \param plSrcCoeff pointer to input buffer
1964 * \param piDstCoeff reference to pointer to output buffer
1965 * \param uiWidth block width
1966 * \param uiHeight block height
1967 * \param uiAbsSum reference to absolute sum of quantized transform coefficient
1968 * \param eTType plane type / luminance or chrominance
1969 * \param uiAbsPartIdx absolute partition index
1970 * \returns Void
1971 * Rate distortion optimized quantization for entropy
1972 * coding engines using probability models like CABAC
1973 */
xRateDistOptQuant(TComTU & rTu,TCoeff * plSrcCoeff,TCoeff * piDstCoeff,TCoeff * piArlDstCoeff,TCoeff & uiAbsSum,const ComponentID compID,const QpParam & cQP)1974 Void TComTrQuant::xRateDistOptQuant ( TComTU &rTu,
1975 TCoeff * plSrcCoeff,
1976 TCoeff * piDstCoeff,
1977 #if ADAPTIVE_QP_SELECTION
1978 TCoeff * piArlDstCoeff,
1979 #endif
1980 TCoeff &uiAbsSum,
1981 const ComponentID compID,
1982 const QpParam &cQP )
1983 {
1984 const TComRectangle & rect = rTu.getRect(compID);
1985 const UInt uiWidth = rect.width;
1986 const UInt uiHeight = rect.height;
1987 TComDataCU * pcCU = rTu.getCU();
1988 const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
1989 const ChannelType channelType = toChannelType(compID);
1990 const UInt uiLog2TrSize = rTu.GetEquivalentLog2TrSize(compID);
1991
1992 const Bool extendedPrecision = pcCU->getSlice()->getSPS()->getUseExtendedPrecision();
1993
1994 /* for 422 chroma blocks, the effective scaling applied during transformation is not a power of 2, hence it cannot be
1995 * implemented as a bit-shift (the quantised result will be sqrt(2) * larger than required). Alternatively, adjust the
1996 * uiLog2TrSize applied in iTransformShift, such that the result is 1/sqrt(2) the required result (i.e. smaller)
1997 * Then a QP+3 (sqrt(2)) or QP-3 (1/sqrt(2)) method could be used to get the required result
1998 */
1999
2000 // Represents scaling through forward transform
2001 Int iTransformShift = getTransformShift(channelType, uiLog2TrSize);
2002 if ((pcCU->getTransformSkip(uiAbsPartIdx, compID) != 0) && pcCU->getSlice()->getSPS()->getUseExtendedPrecision())
2003 {
2004 iTransformShift = std::max<Int>(0, iTransformShift);
2005 }
2006
2007 const Bool bUseGolombRiceParameterAdaptation = pcCU->getSlice()->getSPS()->getUseGolombRiceParameterAdaptation();
2008 const UInt initialGolombRiceParameter = m_pcEstBitsSbac->golombRiceAdaptationStatistics[rTu.getGolombRiceStatisticsIndex(compID)] / RExt__GOLOMB_RICE_INCREMENT_DIVISOR;
2009 UInt uiGoRiceParam = initialGolombRiceParameter;
2010 Double d64BlockUncodedCost = 0;
2011 const UInt uiLog2BlockWidth = g_aucConvertToBit[ uiWidth ] + 2;
2012 const UInt uiLog2BlockHeight = g_aucConvertToBit[ uiHeight ] + 2;
2013 const UInt uiMaxNumCoeff = uiWidth * uiHeight;
2014 assert(compID<MAX_NUM_COMPONENT);
2015
2016 Int scalingListType = getScalingListType(pcCU->getPredictionMode(uiAbsPartIdx), compID);
2017 assert(scalingListType < SCALING_LIST_NUM);
2018
2019 #if ADAPTIVE_QP_SELECTION
2020 memset(piArlDstCoeff, 0, sizeof(TCoeff) * uiMaxNumCoeff);
2021 #endif
2022
2023 Double pdCostCoeff [ MAX_TU_SIZE * MAX_TU_SIZE ];
2024 Double pdCostSig [ MAX_TU_SIZE * MAX_TU_SIZE ];
2025 Double pdCostCoeff0[ MAX_TU_SIZE * MAX_TU_SIZE ];
2026 memset( pdCostCoeff, 0, sizeof(Double) * uiMaxNumCoeff );
2027 memset( pdCostSig, 0, sizeof(Double) * uiMaxNumCoeff );
2028 Int rateIncUp [ MAX_TU_SIZE * MAX_TU_SIZE ];
2029 Int rateIncDown [ MAX_TU_SIZE * MAX_TU_SIZE ];
2030 Int sigRateDelta[ MAX_TU_SIZE * MAX_TU_SIZE ];
2031 TCoeff deltaU [ MAX_TU_SIZE * MAX_TU_SIZE ];
2032 memset( rateIncUp, 0, sizeof(Int ) * uiMaxNumCoeff );
2033 memset( rateIncDown, 0, sizeof(Int ) * uiMaxNumCoeff );
2034 memset( sigRateDelta, 0, sizeof(Int ) * uiMaxNumCoeff );
2035 memset( deltaU, 0, sizeof(TCoeff) * uiMaxNumCoeff );
2036
2037 const Int iQBits = QUANT_SHIFT + cQP.per + iTransformShift; // Right shift of non-RDOQ quantizer; level = (coeff*uiQ + offset)>>q_bits
2038 const Double *const pdErrScale = getErrScaleCoeff(scalingListType, (uiLog2TrSize-2), cQP.rem);
2039 const Int *const piQCoef = getQuantCoeff(scalingListType, cQP.rem, (uiLog2TrSize-2));
2040
2041 const Bool enableScalingLists = getUseScalingList(uiWidth, uiHeight, (pcCU->getTransformSkip(uiAbsPartIdx, compID) != 0));
2042 const Int defaultQuantisationCoefficient = g_quantScales[cQP.rem];
2043 const Double defaultErrorScale = getErrScaleCoeffNoScalingList(scalingListType, (uiLog2TrSize-2), cQP.rem);
2044
2045 const TCoeff entropyCodingMinimum = -(1 << g_maxTrDynamicRange[toChannelType(compID)]);
2046 const TCoeff entropyCodingMaximum = (1 << g_maxTrDynamicRange[toChannelType(compID)]) - 1;
2047
2048 #if ADAPTIVE_QP_SELECTION
2049 Int iQBitsC = iQBits - ARL_C_PRECISION;
2050 Int iAddC = 1 << (iQBitsC-1);
2051 #endif
2052
2053 TUEntropyCodingParameters codingParameters;
2054 getTUEntropyCodingParameters(codingParameters, rTu, compID);
2055 const UInt uiCGSize = (1 << MLS_CG_SIZE);
2056
2057 Double pdCostCoeffGroupSig[ MLS_GRP_NUM ];
2058 UInt uiSigCoeffGroupFlag[ MLS_GRP_NUM ];
2059 Int iCGLastScanPos = -1;
2060
2061 UInt uiCtxSet = 0;
2062 Int c1 = 1;
2063 Int c2 = 0;
2064 Double d64BaseCost = 0;
2065 Int iLastScanPos = -1;
2066
2067 UInt c1Idx = 0;
2068 UInt c2Idx = 0;
2069 Int baseLevel;
2070
2071 memset( pdCostCoeffGroupSig, 0, sizeof(Double) * MLS_GRP_NUM );
2072 memset( uiSigCoeffGroupFlag, 0, sizeof(UInt) * MLS_GRP_NUM );
2073
2074 UInt uiCGNum = uiWidth * uiHeight >> MLS_CG_SIZE;
2075 Int iScanPos;
2076 coeffGroupRDStats rdStats;
2077
2078 const UInt significanceMapContextOffset = getSignificanceMapContextOffset(compID);
2079
2080 for (Int iCGScanPos = uiCGNum-1; iCGScanPos >= 0; iCGScanPos--)
2081 {
2082 UInt uiCGBlkPos = codingParameters.scanCG[ iCGScanPos ];
2083 UInt uiCGPosY = uiCGBlkPos / codingParameters.widthInGroups;
2084 UInt uiCGPosX = uiCGBlkPos - (uiCGPosY * codingParameters.widthInGroups);
2085
2086 memset( &rdStats, 0, sizeof (coeffGroupRDStats));
2087
2088 const Int patternSigCtx = TComTrQuant::calcPatternSigCtx(uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, codingParameters.widthInGroups, codingParameters.heightInGroups);
2089
2090 for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
2091 {
2092 iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
2093 //===== quantization =====
2094 UInt uiBlkPos = codingParameters.scan[iScanPos];
2095 // set coeff
2096
2097 const Int quantisationCoefficient = (enableScalingLists) ? piQCoef [uiBlkPos] : defaultQuantisationCoefficient;
2098 const Double errorScale = (enableScalingLists) ? pdErrScale[uiBlkPos] : defaultErrorScale;
2099
2100 const Int64 tmpLevel = Int64(abs(plSrcCoeff[ uiBlkPos ])) * quantisationCoefficient;
2101
2102 const Intermediate_Int lLevelDouble = (Intermediate_Int)min<Int64>(tmpLevel, MAX_INTERMEDIATE_INT - (Intermediate_Int(1) << (iQBits - 1)));
2103
2104 #if ADAPTIVE_QP_SELECTION
2105 if( m_bUseAdaptQpSelect )
2106 {
2107 piArlDstCoeff[uiBlkPos] = (TCoeff)(( lLevelDouble + iAddC) >> iQBitsC );
2108 }
2109 #endif
2110 const UInt uiMaxAbsLevel = std::min<UInt>(UInt(entropyCodingMaximum), UInt((lLevelDouble + (Intermediate_Int(1) << (iQBits - 1))) >> iQBits));
2111
2112 const Double dErr = Double( lLevelDouble );
2113 pdCostCoeff0[ iScanPos ] = dErr * dErr * errorScale;
2114 d64BlockUncodedCost += pdCostCoeff0[ iScanPos ];
2115 piDstCoeff[ uiBlkPos ] = uiMaxAbsLevel;
2116
2117 if ( uiMaxAbsLevel > 0 && iLastScanPos < 0 )
2118 {
2119 iLastScanPos = iScanPos;
2120 uiCtxSet = getContextSetIndex(compID, (iScanPos >> MLS_CG_SIZE), 0);
2121 iCGLastScanPos = iCGScanPos;
2122 }
2123
2124 if ( iLastScanPos >= 0 )
2125 {
2126 //===== coefficient level estimation =====
2127 UInt uiLevel;
2128 UInt uiOneCtx = (NUM_ONE_FLAG_CTX_PER_SET * uiCtxSet) + c1;
2129 UInt uiAbsCtx = (NUM_ABS_FLAG_CTX_PER_SET * uiCtxSet) + c2;
2130
2131 if( iScanPos == iLastScanPos )
2132 {
2133 uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
2134 lLevelDouble, uiMaxAbsLevel, significanceMapContextOffset, uiOneCtx, uiAbsCtx, uiGoRiceParam,
2135 c1Idx, c2Idx, iQBits, errorScale, 1, extendedPrecision, channelType
2136 );
2137 }
2138 else
2139 {
2140 UShort uiCtxSig = significanceMapContextOffset + getSigCtxInc( patternSigCtx, codingParameters, iScanPos, uiLog2BlockWidth, uiLog2BlockHeight, channelType );
2141
2142 uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
2143 lLevelDouble, uiMaxAbsLevel, uiCtxSig, uiOneCtx, uiAbsCtx, uiGoRiceParam,
2144 c1Idx, c2Idx, iQBits, errorScale, 0, extendedPrecision, channelType
2145 );
2146
2147 sigRateDelta[ uiBlkPos ] = m_pcEstBitsSbac->significantBits[ uiCtxSig ][ 1 ] - m_pcEstBitsSbac->significantBits[ uiCtxSig ][ 0 ];
2148 }
2149
2150 deltaU[ uiBlkPos ] = TCoeff((lLevelDouble - (Intermediate_Int(uiLevel) << iQBits)) >> (iQBits-8));
2151
2152 if( uiLevel > 0 )
2153 {
2154 Int rateNow = xGetICRate( uiLevel, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx, extendedPrecision, channelType );
2155 rateIncUp [ uiBlkPos ] = xGetICRate( uiLevel+1, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx, extendedPrecision, channelType ) - rateNow;
2156 rateIncDown [ uiBlkPos ] = xGetICRate( uiLevel-1, uiOneCtx, uiAbsCtx, uiGoRiceParam, c1Idx, c2Idx, extendedPrecision, channelType ) - rateNow;
2157 }
2158 else // uiLevel == 0
2159 {
2160 rateIncUp [ uiBlkPos ] = m_pcEstBitsSbac->m_greaterOneBits[ uiOneCtx ][ 0 ];
2161 }
2162 piDstCoeff[ uiBlkPos ] = uiLevel;
2163 d64BaseCost += pdCostCoeff [ iScanPos ];
2164
2165 baseLevel = (c1Idx < C1FLAG_NUMBER) ? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
2166 if( uiLevel >= baseLevel )
2167 {
2168 if (uiLevel > 3*(1<<uiGoRiceParam))
2169 {
2170 uiGoRiceParam = bUseGolombRiceParameterAdaptation ? (uiGoRiceParam + 1) : (std::min<UInt>((uiGoRiceParam + 1), 4));
2171 }
2172 }
2173 if ( uiLevel >= 1)
2174 {
2175 c1Idx ++;
2176 }
2177
2178 //===== update bin model =====
2179 if( uiLevel > 1 )
2180 {
2181 c1 = 0;
2182 c2 += (c2 < 2);
2183 c2Idx ++;
2184 }
2185 else if( (c1 < 3) && (c1 > 0) && uiLevel)
2186 {
2187 c1++;
2188 }
2189
2190 //===== context set update =====
2191 if( ( iScanPos % uiCGSize == 0 ) && ( iScanPos > 0 ) )
2192 {
2193 uiCtxSet = getContextSetIndex(compID, ((iScanPos - 1) >> MLS_CG_SIZE), (c1 == 0)); //(iScanPos - 1) because we do this **before** entering the final group
2194 c1 = 1;
2195 c2 = 0;
2196 c1Idx = 0;
2197 c2Idx = 0;
2198 uiGoRiceParam = initialGolombRiceParameter;
2199 }
2200 }
2201 else
2202 {
2203 d64BaseCost += pdCostCoeff0[ iScanPos ];
2204 }
2205 rdStats.d64SigCost += pdCostSig[ iScanPos ];
2206 if (iScanPosinCG == 0 )
2207 {
2208 rdStats.d64SigCost_0 = pdCostSig[ iScanPos ];
2209 }
2210 if (piDstCoeff[ uiBlkPos ] )
2211 {
2212 uiSigCoeffGroupFlag[ uiCGBlkPos ] = 1;
2213 rdStats.d64CodedLevelandDist += pdCostCoeff[ iScanPos ] - pdCostSig[ iScanPos ];
2214 rdStats.d64UncodedDist += pdCostCoeff0[ iScanPos ];
2215 if ( iScanPosinCG != 0 )
2216 {
2217 rdStats.iNNZbeforePos0++;
2218 }
2219 }
2220 } //end for (iScanPosinCG)
2221
2222 if (iCGLastScanPos >= 0)
2223 {
2224 if( iCGScanPos )
2225 {
2226 if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0)
2227 {
2228 UInt uiCtxSig = getSigCoeffGroupCtxInc( uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, codingParameters.widthInGroups, codingParameters.heightInGroups );
2229 d64BaseCost += xGetRateSigCoeffGroup(0, uiCtxSig) - rdStats.d64SigCost;;
2230 pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(0, uiCtxSig);
2231 }
2232 else
2233 {
2234 if (iCGScanPos < iCGLastScanPos) //skip the last coefficient group, which will be handled together with last position below.
2235 {
2236 if ( rdStats.iNNZbeforePos0 == 0 )
2237 {
2238 d64BaseCost -= rdStats.d64SigCost_0;
2239 rdStats.d64SigCost -= rdStats.d64SigCost_0;
2240 }
2241 // rd-cost if SigCoeffGroupFlag = 0, initialization
2242 Double d64CostZeroCG = d64BaseCost;
2243
2244 // add SigCoeffGroupFlag cost to total cost
2245 UInt uiCtxSig = getSigCoeffGroupCtxInc( uiSigCoeffGroupFlag, uiCGPosX, uiCGPosY, codingParameters.widthInGroups, codingParameters.heightInGroups );
2246
2247 if (iCGScanPos < iCGLastScanPos)
2248 {
2249 d64BaseCost += xGetRateSigCoeffGroup(1, uiCtxSig);
2250 d64CostZeroCG += xGetRateSigCoeffGroup(0, uiCtxSig);
2251 pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(1, uiCtxSig);
2252 }
2253
2254 // try to convert the current coeff group from non-zero to all-zero
2255 d64CostZeroCG += rdStats.d64UncodedDist; // distortion for resetting non-zero levels to zero levels
2256 d64CostZeroCG -= rdStats.d64CodedLevelandDist; // distortion and level cost for keeping all non-zero levels
2257 d64CostZeroCG -= rdStats.d64SigCost; // sig cost for all coeffs, including zero levels and non-zerl levels
2258
2259 // if we can save cost, change this block to all-zero block
2260 if ( d64CostZeroCG < d64BaseCost )
2261 {
2262 uiSigCoeffGroupFlag[ uiCGBlkPos ] = 0;
2263 d64BaseCost = d64CostZeroCG;
2264 if (iCGScanPos < iCGLastScanPos)
2265 {
2266 pdCostCoeffGroupSig[ iCGScanPos ] = xGetRateSigCoeffGroup(0, uiCtxSig);
2267 }
2268 // reset coeffs to 0 in this block
2269 for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
2270 {
2271 iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
2272 UInt uiBlkPos = codingParameters.scan[ iScanPos ];
2273
2274 if (piDstCoeff[ uiBlkPos ])
2275 {
2276 piDstCoeff [ uiBlkPos ] = 0;
2277 pdCostCoeff[ iScanPos ] = pdCostCoeff0[ iScanPos ];
2278 pdCostSig [ iScanPos ] = 0;
2279 }
2280 }
2281 } // end if ( d64CostAllZeros < d64BaseCost )
2282 }
2283 } // end if if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0)
2284 }
2285 else
2286 {
2287 uiSigCoeffGroupFlag[ uiCGBlkPos ] = 1;
2288 }
2289 }
2290 } //end for (iCGScanPos)
2291
2292 //===== estimate last position =====
2293 if ( iLastScanPos < 0 )
2294 {
2295 return;
2296 }
2297
2298 Double d64BestCost = 0;
2299 Int ui16CtxCbf = 0;
2300 Int iBestLastIdxP1 = 0;
2301 if( !pcCU->isIntra( uiAbsPartIdx ) && isLuma(compID) && pcCU->getTransformIdx( uiAbsPartIdx ) == 0 )
2302 {
2303 ui16CtxCbf = 0;
2304 d64BestCost = d64BlockUncodedCost + xGetICost( m_pcEstBitsSbac->blockRootCbpBits[ ui16CtxCbf ][ 0 ] );
2305 d64BaseCost += xGetICost( m_pcEstBitsSbac->blockRootCbpBits[ ui16CtxCbf ][ 1 ] );
2306 }
2307 else
2308 {
2309 ui16CtxCbf = pcCU->getCtxQtCbf( rTu, channelType );
2310 ui16CtxCbf += getCBFContextOffset(compID);
2311 d64BestCost = d64BlockUncodedCost + xGetICost( m_pcEstBitsSbac->blockCbpBits[ ui16CtxCbf ][ 0 ] );
2312 d64BaseCost += xGetICost( m_pcEstBitsSbac->blockCbpBits[ ui16CtxCbf ][ 1 ] );
2313 }
2314
2315
2316 Bool bFoundLast = false;
2317 for (Int iCGScanPos = iCGLastScanPos; iCGScanPos >= 0; iCGScanPos--)
2318 {
2319 UInt uiCGBlkPos = codingParameters.scanCG[ iCGScanPos ];
2320
2321 d64BaseCost -= pdCostCoeffGroupSig [ iCGScanPos ];
2322 if (uiSigCoeffGroupFlag[ uiCGBlkPos ])
2323 {
2324 for (Int iScanPosinCG = uiCGSize-1; iScanPosinCG >= 0; iScanPosinCG--)
2325 {
2326 iScanPos = iCGScanPos*uiCGSize + iScanPosinCG;
2327
2328 if (iScanPos > iLastScanPos) continue;
2329 UInt uiBlkPos = codingParameters.scan[iScanPos];
2330
2331 if( piDstCoeff[ uiBlkPos ] )
2332 {
2333 UInt uiPosY = uiBlkPos >> uiLog2BlockWidth;
2334 UInt uiPosX = uiBlkPos - ( uiPosY << uiLog2BlockWidth );
2335
2336 Double d64CostLast= codingParameters.scanType == SCAN_VER ? xGetRateLast( uiPosY, uiPosX, compID ) : xGetRateLast( uiPosX, uiPosY, compID );
2337 Double totalCost = d64BaseCost + d64CostLast - pdCostSig[ iScanPos ];
2338
2339 if( totalCost < d64BestCost )
2340 {
2341 iBestLastIdxP1 = iScanPos + 1;
2342 d64BestCost = totalCost;
2343 }
2344 if( piDstCoeff[ uiBlkPos ] > 1 )
2345 {
2346 bFoundLast = true;
2347 break;
2348 }
2349 d64BaseCost -= pdCostCoeff[ iScanPos ];
2350 d64BaseCost += pdCostCoeff0[ iScanPos ];
2351 }
2352 else
2353 {
2354 d64BaseCost -= pdCostSig[ iScanPos ];
2355 }
2356 } //end for
2357 if (bFoundLast)
2358 {
2359 break;
2360 }
2361 } // end if (uiSigCoeffGroupFlag[ uiCGBlkPos ])
2362 } // end for
2363
2364
2365 for ( Int scanPos = 0; scanPos < iBestLastIdxP1; scanPos++ )
2366 {
2367 Int blkPos = codingParameters.scan[ scanPos ];
2368 TCoeff level = piDstCoeff[ blkPos ];
2369 uiAbsSum += level;
2370 piDstCoeff[ blkPos ] = ( plSrcCoeff[ blkPos ] < 0 ) ? -level : level;
2371 }
2372
2373 //===== clean uncoded coefficients =====
2374 for ( Int scanPos = iBestLastIdxP1; scanPos <= iLastScanPos; scanPos++ )
2375 {
2376 piDstCoeff[ codingParameters.scan[ scanPos ] ] = 0;
2377 }
2378
2379
2380 if( pcCU->getSlice()->getPPS()->getSignHideFlag() && uiAbsSum>=2)
2381 {
2382 const Double inverseQuantScale = Double(g_invQuantScales[cQP.rem]);
2383 Int64 rdFactor = (Int64)(inverseQuantScale * inverseQuantScale * (1 << (2 * cQP.per))
2384 / m_dLambda / 16 / (1 << (2 * DISTORTION_PRECISION_ADJUSTMENT(g_bitDepth[channelType] - 8)))
2385 + 0.5);
2386
2387 Int lastCG = -1;
2388 Int absSum = 0 ;
2389 Int n ;
2390
2391 for( Int subSet = (uiWidth*uiHeight-1) >> MLS_CG_SIZE; subSet >= 0; subSet-- )
2392 {
2393 Int subPos = subSet << MLS_CG_SIZE;
2394 Int firstNZPosInCG=uiCGSize , lastNZPosInCG=-1 ;
2395 absSum = 0 ;
2396
2397 for(n = uiCGSize-1; n >= 0; --n )
2398 {
2399 if( piDstCoeff[ codingParameters.scan[ n + subPos ]] )
2400 {
2401 lastNZPosInCG = n;
2402 break;
2403 }
2404 }
2405
2406 for(n = 0; n <uiCGSize; n++ )
2407 {
2408 if( piDstCoeff[ codingParameters.scan[ n + subPos ]] )
2409 {
2410 firstNZPosInCG = n;
2411 break;
2412 }
2413 }
2414
2415 for(n = firstNZPosInCG; n <=lastNZPosInCG; n++ )
2416 {
2417 absSum += Int(piDstCoeff[ codingParameters.scan[ n + subPos ]]);
2418 }
2419
2420 if(lastNZPosInCG>=0 && lastCG==-1)
2421 {
2422 lastCG = 1;
2423 }
2424
2425 if( lastNZPosInCG-firstNZPosInCG>=SBH_THRESHOLD )
2426 {
2427 UInt signbit = (piDstCoeff[codingParameters.scan[subPos+firstNZPosInCG]]>0?0:1);
2428 if( signbit!=(absSum&0x1) ) // hide but need tune
2429 {
2430 // calculate the cost
2431 Int64 minCostInc = MAX_INT64, curCost = MAX_INT64;
2432 Int minPos = -1, finalChange = 0, curChange = 0;
2433
2434 for( n = (lastCG==1?lastNZPosInCG:uiCGSize-1) ; n >= 0; --n )
2435 {
2436 UInt uiBlkPos = codingParameters.scan[ n + subPos ];
2437 if(piDstCoeff[ uiBlkPos ] != 0 )
2438 {
2439 Int64 costUp = rdFactor * ( - deltaU[uiBlkPos] ) + rateIncUp[uiBlkPos];
2440 Int64 costDown = rdFactor * ( deltaU[uiBlkPos] ) + rateIncDown[uiBlkPos]
2441 - ((abs(piDstCoeff[uiBlkPos]) == 1) ? sigRateDelta[uiBlkPos] : 0);
2442
2443 if(lastCG==1 && lastNZPosInCG==n && abs(piDstCoeff[uiBlkPos])==1)
2444 {
2445 costDown -= (4<<15);
2446 }
2447
2448 if(costUp<costDown)
2449 {
2450 curCost = costUp;
2451 curChange = 1;
2452 }
2453 else
2454 {
2455 curChange = -1;
2456 if(n==firstNZPosInCG && abs(piDstCoeff[uiBlkPos])==1)
2457 {
2458 curCost = MAX_INT64;
2459 }
2460 else
2461 {
2462 curCost = costDown;
2463 }
2464 }
2465 }
2466 else
2467 {
2468 curCost = rdFactor * ( - (abs(deltaU[uiBlkPos])) ) + (1<<15) + rateIncUp[uiBlkPos] + sigRateDelta[uiBlkPos] ;
2469 curChange = 1 ;
2470
2471 if(n<firstNZPosInCG)
2472 {
2473 UInt thissignbit = (plSrcCoeff[uiBlkPos]>=0?0:1);
2474 if(thissignbit != signbit )
2475 {
2476 curCost = MAX_INT64;
2477 }
2478 }
2479 }
2480
2481 if( curCost<minCostInc)
2482 {
2483 minCostInc = curCost;
2484 finalChange = curChange;
2485 minPos = uiBlkPos;
2486 }
2487 }
2488
2489 if(piDstCoeff[minPos] == entropyCodingMaximum || piDstCoeff[minPos] == entropyCodingMinimum)
2490 {
2491 finalChange = -1;
2492 }
2493
2494 if(plSrcCoeff[minPos]>=0)
2495 {
2496 piDstCoeff[minPos] += finalChange ;
2497 }
2498 else
2499 {
2500 piDstCoeff[minPos] -= finalChange ;
2501 }
2502 }
2503 }
2504
2505 if(lastCG==1)
2506 {
2507 lastCG=0 ;
2508 }
2509 }
2510 }
2511 }
2512
2513
2514 /** Pattern decision for context derivation process of significant_coeff_flag
2515 * \param sigCoeffGroupFlag pointer to prior coded significant coeff group
2516 * \param uiCGPosX column of current coefficient group
2517 * \param uiCGPosY row of current coefficient group
2518 * \param width width of the block
2519 * \param height height of the block
2520 * \returns pattern for current coefficient group
2521 */
calcPatternSigCtx(const UInt * sigCoeffGroupFlag,UInt uiCGPosX,UInt uiCGPosY,UInt widthInGroups,UInt heightInGroups)2522 Int TComTrQuant::calcPatternSigCtx( const UInt* sigCoeffGroupFlag, UInt uiCGPosX, UInt uiCGPosY, UInt widthInGroups, UInt heightInGroups )
2523 {
2524 if ((widthInGroups <= 1) && (heightInGroups <= 1)) return 0;
2525
2526 const Bool rightAvailable = uiCGPosX < (widthInGroups - 1);
2527 const Bool belowAvailable = uiCGPosY < (heightInGroups - 1);
2528
2529 UInt sigRight = 0;
2530 UInt sigLower = 0;
2531
2532 if (rightAvailable) sigRight = ((sigCoeffGroupFlag[ (uiCGPosY * widthInGroups) + uiCGPosX + 1 ] != 0) ? 1 : 0);
2533 if (belowAvailable) sigLower = ((sigCoeffGroupFlag[ (uiCGPosY + 1) * widthInGroups + uiCGPosX ] != 0) ? 1 : 0);
2534
2535 return sigRight + (sigLower << 1);
2536 }
2537
2538
2539 /** Context derivation process of coeff_abs_significant_flag
2540 * \param patternSigCtx pattern for current coefficient group
2541 * \param codingParameters coding parmeters for the TU (includes the scan)
2542 * \param scanPosition current position in scan order
2543 * \param log2BlockWidth log2 width of the block
2544 * \param log2BlockHeight log2 height of the block
2545 * \param ChannelType channel type (CHANNEL_TYPE_LUMA/CHROMA)
2546 * \returns ctxInc for current scan position
2547 */
getSigCtxInc(Int patternSigCtx,const TUEntropyCodingParameters & codingParameters,const Int scanPosition,const Int log2BlockWidth,const Int log2BlockHeight,const ChannelType chanType)2548 Int TComTrQuant::getSigCtxInc ( Int patternSigCtx,
2549 const TUEntropyCodingParameters &codingParameters,
2550 const Int scanPosition,
2551 const Int log2BlockWidth,
2552 const Int log2BlockHeight,
2553 const ChannelType chanType)
2554 {
2555 if (codingParameters.firstSignificanceMapContext == significanceMapContextSetStart[chanType][CONTEXT_TYPE_SINGLE])
2556 {
2557 //single context mode
2558 return significanceMapContextSetStart[chanType][CONTEXT_TYPE_SINGLE];
2559 }
2560
2561 const UInt rasterPosition = codingParameters.scan[scanPosition];
2562 const UInt posY = rasterPosition >> log2BlockWidth;
2563 const UInt posX = rasterPosition - (posY << log2BlockWidth);
2564
2565 if ((posX + posY) == 0) return 0; //special case for the DC context variable
2566
2567 Int offset = MAX_INT;
2568
2569 if ((log2BlockWidth == 2) && (log2BlockHeight == 2)) //4x4
2570 {
2571 offset = ctxIndMap4x4[ (4 * posY) + posX ];
2572 }
2573 else
2574 {
2575 Int cnt = 0;
2576
2577 switch (patternSigCtx)
2578 {
2579 //------------------
2580
2581 case 0: //neither neighbouring group is significant
2582 {
2583 const Int posXinSubset = posX & ((1 << MLS_CG_LOG2_WIDTH) - 1);
2584 const Int posYinSubset = posY & ((1 << MLS_CG_LOG2_HEIGHT) - 1);
2585 const Int posTotalInSubset = posXinSubset + posYinSubset;
2586
2587 //first N coefficients in scan order use 2; the next few use 1; the rest use 0.
2588 const UInt context1Threshold = NEIGHBOURHOOD_00_CONTEXT_1_THRESHOLD_4x4;
2589 const UInt context2Threshold = NEIGHBOURHOOD_00_CONTEXT_2_THRESHOLD_4x4;
2590
2591 cnt = (posTotalInSubset >= context1Threshold) ? 0 : ((posTotalInSubset >= context2Threshold) ? 1 : 2);
2592 }
2593 break;
2594
2595 //------------------
2596
2597 case 1: //right group is significant, below is not
2598 {
2599 const Int posYinSubset = posY & ((1 << MLS_CG_LOG2_HEIGHT) - 1);
2600 const Int groupHeight = 1 << MLS_CG_LOG2_HEIGHT;
2601
2602 cnt = (posYinSubset >= (groupHeight >> 1)) ? 0 : ((posYinSubset >= (groupHeight >> 2)) ? 1 : 2); //top quarter uses 2; second-from-top quarter uses 1; bottom half uses 0
2603 }
2604 break;
2605
2606 //------------------
2607
2608 case 2: //below group is significant, right is not
2609 {
2610 const Int posXinSubset = posX & ((1 << MLS_CG_LOG2_WIDTH) - 1);
2611 const Int groupWidth = 1 << MLS_CG_LOG2_WIDTH;
2612
2613 cnt = (posXinSubset >= (groupWidth >> 1)) ? 0 : ((posXinSubset >= (groupWidth >> 2)) ? 1 : 2); //left quarter uses 2; second-from-left quarter uses 1; right half uses 0
2614 }
2615 break;
2616
2617 //------------------
2618
2619 case 3: //both neighbouring groups are significant
2620 {
2621 cnt = 2;
2622 }
2623 break;
2624
2625 //------------------
2626
2627 default:
2628 std::cerr << "ERROR: Invalid patternSigCtx \"" << Int(patternSigCtx) << "\" in getSigCtxInc" << std::endl;
2629 exit(1);
2630 break;
2631 }
2632
2633 //------------------------------------------------
2634
2635 const Bool notFirstGroup = ((posX >> MLS_CG_LOG2_WIDTH) + (posY >> MLS_CG_LOG2_HEIGHT)) > 0;
2636
2637 offset = (notFirstGroup ? notFirstGroupNeighbourhoodContextOffset[chanType] : 0) + cnt;
2638 }
2639
2640 return codingParameters.firstSignificanceMapContext + offset;
2641 }
2642
2643
2644 /** Get the best level in RD sense
2645 * \param rd64CodedCost reference to coded cost
2646 * \param rd64CodedCost0 reference to cost when coefficient is 0
2647 * \param rd64CodedCostSig reference to cost of significant coefficient
2648 * \param lLevelDouble reference to unscaled quantized level
2649 * \param uiMaxAbsLevel scaled quantized level
2650 * \param ui16CtxNumSig current ctxInc for coeff_abs_significant_flag
2651 * \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
2652 * \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
2653 * \param ui16AbsGoRice current Rice parameter for coeff_abs_level_minus3
2654 * \param iQBits quantization step size
2655 * \param dTemp correction factor
2656 * \param bLast indicates if the coefficient is the last significant
2657 * \returns best quantized transform level for given scan position
2658 * This method calculates the best quantized transform level for a given scan position.
2659 */
xGetCodedLevel(Double & rd64CodedCost,Double & rd64CodedCost0,Double & rd64CodedCostSig,Intermediate_Int lLevelDouble,UInt uiMaxAbsLevel,UShort ui16CtxNumSig,UShort ui16CtxNumOne,UShort ui16CtxNumAbs,UShort ui16AbsGoRice,UInt c1Idx,UInt c2Idx,Int iQBits,Double errorScale,Bool bLast,Bool useLimitedPrefixLength,ChannelType channelType) const2660 __inline UInt TComTrQuant::xGetCodedLevel ( Double& rd64CodedCost,
2661 Double& rd64CodedCost0,
2662 Double& rd64CodedCostSig,
2663 Intermediate_Int lLevelDouble,
2664 UInt uiMaxAbsLevel,
2665 UShort ui16CtxNumSig,
2666 UShort ui16CtxNumOne,
2667 UShort ui16CtxNumAbs,
2668 UShort ui16AbsGoRice,
2669 UInt c1Idx,
2670 UInt c2Idx,
2671 Int iQBits,
2672 Double errorScale,
2673 Bool bLast,
2674 Bool useLimitedPrefixLength,
2675 ChannelType channelType
2676 ) const
2677 {
2678 Double dCurrCostSig = 0;
2679 UInt uiBestAbsLevel = 0;
2680
2681 if( !bLast && uiMaxAbsLevel < 3 )
2682 {
2683 rd64CodedCostSig = xGetRateSigCoef( 0, ui16CtxNumSig );
2684 rd64CodedCost = rd64CodedCost0 + rd64CodedCostSig;
2685 if( uiMaxAbsLevel == 0 )
2686 {
2687 return uiBestAbsLevel;
2688 }
2689 }
2690 else
2691 {
2692 rd64CodedCost = MAX_DOUBLE;
2693 }
2694
2695 if( !bLast )
2696 {
2697 dCurrCostSig = xGetRateSigCoef( 1, ui16CtxNumSig );
2698 }
2699
2700 UInt uiMinAbsLevel = ( uiMaxAbsLevel > 1 ? uiMaxAbsLevel - 1 : 1 );
2701 for( Int uiAbsLevel = uiMaxAbsLevel; uiAbsLevel >= uiMinAbsLevel ; uiAbsLevel-- )
2702 {
2703 Double dErr = Double( lLevelDouble - ( Intermediate_Int(uiAbsLevel) << iQBits ) );
2704 Double dCurrCost = dErr * dErr * errorScale + xGetICost( xGetICRate( uiAbsLevel, ui16CtxNumOne, ui16CtxNumAbs, ui16AbsGoRice, c1Idx, c2Idx, useLimitedPrefixLength, channelType ) );
2705 dCurrCost += dCurrCostSig;
2706
2707 if( dCurrCost < rd64CodedCost )
2708 {
2709 uiBestAbsLevel = uiAbsLevel;
2710 rd64CodedCost = dCurrCost;
2711 rd64CodedCostSig = dCurrCostSig;
2712 }
2713 }
2714
2715 return uiBestAbsLevel;
2716 }
2717
2718 /** Calculates the cost for specific absolute transform level
2719 * \param uiAbsLevel scaled quantized level
2720 * \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
2721 * \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
2722 * \param ui16AbsGoRice Rice parameter for coeff_abs_level_minus3
2723 * \returns cost of given absolute transform level
2724 */
xGetICRate(UInt uiAbsLevel,UShort ui16CtxNumOne,UShort ui16CtxNumAbs,UShort ui16AbsGoRice,UInt c1Idx,UInt c2Idx,Bool useLimitedPrefixLength,ChannelType channelType) const2725 __inline Int TComTrQuant::xGetICRate ( UInt uiAbsLevel,
2726 UShort ui16CtxNumOne,
2727 UShort ui16CtxNumAbs,
2728 UShort ui16AbsGoRice,
2729 UInt c1Idx,
2730 UInt c2Idx,
2731 Bool useLimitedPrefixLength,
2732 ChannelType channelType
2733 ) const
2734 {
2735 Int iRate = Int(xGetIEPRate()); // cost of sign bit
2736 UInt baseLevel = (c1Idx < C1FLAG_NUMBER) ? (2 + (c2Idx < C2FLAG_NUMBER)) : 1;
2737
2738 if ( uiAbsLevel >= baseLevel )
2739 {
2740 UInt symbol = uiAbsLevel - baseLevel;
2741 UInt length;
2742 if (symbol < (COEF_REMAIN_BIN_REDUCTION << ui16AbsGoRice))
2743 {
2744 length = symbol>>ui16AbsGoRice;
2745 iRate += (length+1+ui16AbsGoRice)<< 15;
2746 }
2747 else if (useLimitedPrefixLength)
2748 {
2749 const UInt maximumPrefixLength = (32 - (COEF_REMAIN_BIN_REDUCTION + g_maxTrDynamicRange[channelType]));
2750
2751 UInt prefixLength = 0;
2752 UInt suffix = (symbol >> ui16AbsGoRice) - COEF_REMAIN_BIN_REDUCTION;
2753
2754 while ((prefixLength < maximumPrefixLength) && (suffix > ((2 << prefixLength) - 2)))
2755 {
2756 prefixLength++;
2757 }
2758
2759 const UInt suffixLength = (prefixLength == maximumPrefixLength) ? (g_maxTrDynamicRange[channelType] - ui16AbsGoRice) : (prefixLength + 1/*separator*/);
2760
2761 iRate += (COEF_REMAIN_BIN_REDUCTION + prefixLength + suffixLength + ui16AbsGoRice) << 15;
2762 }
2763 else
2764 {
2765 length = ui16AbsGoRice;
2766 symbol = symbol - ( COEF_REMAIN_BIN_REDUCTION << ui16AbsGoRice);
2767 while (symbol >= (1<<length))
2768 {
2769 symbol -= (1<<(length++));
2770 }
2771 iRate += (COEF_REMAIN_BIN_REDUCTION+length+1-ui16AbsGoRice+length)<< 15;
2772 }
2773
2774 if (c1Idx < C1FLAG_NUMBER)
2775 {
2776 iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
2777
2778 if (c2Idx < C2FLAG_NUMBER)
2779 {
2780 iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 1 ];
2781 }
2782 }
2783 }
2784 else if( uiAbsLevel == 1 )
2785 {
2786 iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 0 ];
2787 }
2788 else if( uiAbsLevel == 2 )
2789 {
2790 iRate += m_pcEstBitsSbac->m_greaterOneBits[ ui16CtxNumOne ][ 1 ];
2791 iRate += m_pcEstBitsSbac->m_levelAbsBits[ ui16CtxNumAbs ][ 0 ];
2792 }
2793 else
2794 {
2795 iRate = 0;
2796 }
2797
2798 return iRate;
2799 }
2800
xGetRateSigCoeffGroup(UShort uiSignificanceCoeffGroup,UShort ui16CtxNumSig) const2801 __inline Double TComTrQuant::xGetRateSigCoeffGroup ( UShort uiSignificanceCoeffGroup,
2802 UShort ui16CtxNumSig ) const
2803 {
2804 return xGetICost( m_pcEstBitsSbac->significantCoeffGroupBits[ ui16CtxNumSig ][ uiSignificanceCoeffGroup ] );
2805 }
2806
2807 /** Calculates the cost of signaling the last significant coefficient in the block
2808 * \param uiPosX X coordinate of the last significant coefficient
2809 * \param uiPosY Y coordinate of the last significant coefficient
2810 * \returns cost of last significant coefficient
2811 */
2812 /*
2813 * \param uiWidth width of the transform unit (TU)
2814 */
xGetRateLast(const UInt uiPosX,const UInt uiPosY,const ComponentID component) const2815 __inline Double TComTrQuant::xGetRateLast ( const UInt uiPosX,
2816 const UInt uiPosY,
2817 const ComponentID component ) const
2818 {
2819 UInt uiCtxX = g_uiGroupIdx[uiPosX];
2820 UInt uiCtxY = g_uiGroupIdx[uiPosY];
2821
2822 Double uiCost = m_pcEstBitsSbac->lastXBits[toChannelType(component)][ uiCtxX ] + m_pcEstBitsSbac->lastYBits[toChannelType(component)][ uiCtxY ];
2823
2824 if( uiCtxX > 3 )
2825 {
2826 uiCost += xGetIEPRate() * ((uiCtxX-2)>>1);
2827 }
2828 if( uiCtxY > 3 )
2829 {
2830 uiCost += xGetIEPRate() * ((uiCtxY-2)>>1);
2831 }
2832 return xGetICost( uiCost );
2833 }
2834
2835 /** Calculates the cost for specific absolute transform level
2836 * \param uiAbsLevel scaled quantized level
2837 * \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
2838 * \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
2839 * \param ui16CtxBase current global offset for coeff_abs_level_greater1 and coeff_abs_level_greater2
2840 * \returns cost of given absolute transform level
2841 */
xGetRateSigCoef(UShort uiSignificance,UShort ui16CtxNumSig) const2842 __inline Double TComTrQuant::xGetRateSigCoef ( UShort uiSignificance,
2843 UShort ui16CtxNumSig ) const
2844 {
2845 return xGetICost( m_pcEstBitsSbac->significantBits[ ui16CtxNumSig ][ uiSignificance ] );
2846 }
2847
2848 /** Get the cost for a specific rate
2849 * \param dRate rate of a bit
2850 * \returns cost at the specific rate
2851 */
xGetICost(Double dRate) const2852 __inline Double TComTrQuant::xGetICost ( Double dRate ) const
2853 {
2854 return m_dLambda * dRate;
2855 }
2856
2857 /** Get the cost of an equal probable bit
2858 * \returns cost of equal probable bit
2859 */
xGetIEPRate() const2860 __inline Double TComTrQuant::xGetIEPRate ( ) const
2861 {
2862 return 32768;
2863 }
2864
2865 /** Context derivation process of coeff_abs_significant_flag
2866 * \param uiSigCoeffGroupFlag significance map of L1
2867 * \param uiBlkX column of current scan position
2868 * \param uiBlkY row of current scan position
2869 * \param uiLog2BlkSize log2 value of block size
2870 * \returns ctxInc for current scan position
2871 */
getSigCoeffGroupCtxInc(const UInt * uiSigCoeffGroupFlag,const UInt uiCGPosX,const UInt uiCGPosY,const UInt widthInGroups,const UInt heightInGroups)2872 UInt TComTrQuant::getSigCoeffGroupCtxInc (const UInt* uiSigCoeffGroupFlag,
2873 const UInt uiCGPosX,
2874 const UInt uiCGPosY,
2875 const UInt widthInGroups,
2876 const UInt heightInGroups)
2877 {
2878 UInt sigRight = 0;
2879 UInt sigLower = 0;
2880
2881 if (uiCGPosX < (widthInGroups - 1)) sigRight = ((uiSigCoeffGroupFlag[ (uiCGPosY * widthInGroups) + uiCGPosX + 1 ] != 0) ? 1 : 0);
2882 if (uiCGPosY < (heightInGroups - 1)) sigLower = ((uiSigCoeffGroupFlag[ (uiCGPosY + 1) * widthInGroups + uiCGPosX ] != 0) ? 1 : 0);
2883
2884 return ((sigRight + sigLower) != 0) ? 1 : 0;
2885 }
2886
2887
2888 /** set quantized matrix coefficient for encode
2889 * \param scalingList quantaized matrix address
2890 */
setScalingList(TComScalingList * scalingList,const ChromaFormat format)2891 Void TComTrQuant::setScalingList(TComScalingList *scalingList, const ChromaFormat format)
2892 {
2893 const Int minimumQp = 0;
2894 const Int maximumQp = SCALING_LIST_REM_NUM;
2895
2896 for(UInt size = 0; size < SCALING_LIST_SIZE_NUM; size++)
2897 {
2898 for(UInt list = 0; list < SCALING_LIST_NUM; list++)
2899 {
2900 for(Int qp = minimumQp; qp < maximumQp; qp++)
2901 {
2902 xSetScalingListEnc(scalingList,list,size,qp,format);
2903 xSetScalingListDec(scalingList,list,size,qp,format);
2904 setErrScaleCoeff(list,size,qp);
2905 }
2906 }
2907 }
2908 }
2909 /** set quantized matrix coefficient for decode
2910 * \param scalingList quantaized matrix address
2911 */
setScalingListDec(TComScalingList * scalingList,const ChromaFormat format)2912 Void TComTrQuant::setScalingListDec(TComScalingList *scalingList, const ChromaFormat format)
2913 {
2914 const Int minimumQp = 0;
2915 const Int maximumQp = SCALING_LIST_REM_NUM;
2916
2917 for(UInt size = 0; size < SCALING_LIST_SIZE_NUM; size++)
2918 {
2919 for(UInt list = 0; list < SCALING_LIST_NUM; list++)
2920 {
2921 for(Int qp = minimumQp; qp < maximumQp; qp++)
2922 {
2923 xSetScalingListDec(scalingList,list,size,qp,format);
2924 }
2925 }
2926 }
2927 }
2928 /** set error scale coefficients
2929 * \param list List ID
2930 * \param uiSize Size
2931 * \param uiQP Quantization parameter
2932 */
setErrScaleCoeff(UInt list,UInt size,Int qp)2933 Void TComTrQuant::setErrScaleCoeff(UInt list, UInt size, Int qp)
2934 {
2935 const UInt uiLog2TrSize = g_aucConvertToBit[ g_scalingListSizeX[size] ] + 2;
2936 const ChannelType channelType = ((list == 0) || (list == MAX_NUM_COMPONENT)) ? CHANNEL_TYPE_LUMA : CHANNEL_TYPE_CHROMA;
2937
2938 const Int iTransformShift = getTransformShift(channelType, uiLog2TrSize); // Represents scaling through forward transform
2939
2940 UInt i,uiMaxNumCoeff = g_scalingListSize[size];
2941 Int *piQuantcoeff;
2942 Double *pdErrScale;
2943 piQuantcoeff = getQuantCoeff(list, qp,size);
2944 pdErrScale = getErrScaleCoeff(list, size, qp);
2945
2946 Double dErrScale = (Double)(1<<SCALE_BITS); // Compensate for scaling of bitcount in Lagrange cost function
2947 dErrScale = dErrScale*pow(2.0,(-2.0*iTransformShift)); // Compensate for scaling through forward transform
2948
2949 for(i=0;i<uiMaxNumCoeff;i++)
2950 {
2951 pdErrScale[i] = dErrScale / piQuantcoeff[i] / piQuantcoeff[i] / (1 << DISTORTION_PRECISION_ADJUSTMENT(2 * (g_bitDepth[channelType] - 8)));
2952 }
2953
2954 getErrScaleCoeffNoScalingList(list, size, qp) = dErrScale / g_quantScales[qp] / g_quantScales[qp] / (1 << DISTORTION_PRECISION_ADJUSTMENT(2 * (g_bitDepth[channelType] - 8)));
2955 }
2956
2957 /** set quantized matrix coefficient for encode
2958 * \param scalingList quantaized matrix address
2959 * \param listId List index
2960 * \param sizeId size index
2961 * \param uiQP Quantization parameter
2962 */
xSetScalingListEnc(TComScalingList * scalingList,UInt listId,UInt sizeId,Int qp,const ChromaFormat format)2963 Void TComTrQuant::xSetScalingListEnc(TComScalingList *scalingList, UInt listId, UInt sizeId, Int qp, const ChromaFormat format)
2964 {
2965 UInt width = g_scalingListSizeX[sizeId];
2966 UInt height = g_scalingListSizeX[sizeId];
2967 UInt ratio = g_scalingListSizeX[sizeId]/min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]);
2968 Int *quantcoeff;
2969 Int *coeff = scalingList->getScalingListAddress(sizeId,listId);
2970 quantcoeff = getQuantCoeff(listId, qp, sizeId);
2971
2972 Int quantScales = g_quantScales[qp];
2973
2974 processScalingListEnc(coeff,
2975 quantcoeff,
2976 (quantScales << LOG2_SCALING_LIST_NEUTRAL_VALUE),
2977 height, width, ratio,
2978 min(MAX_MATRIX_SIZE_NUM, (Int)g_scalingListSizeX[sizeId]),
2979 scalingList->getScalingListDC(sizeId,listId));
2980 }
2981
2982 /** set quantized matrix coefficient for decode
2983 * \param scalingList quantaized matrix address
2984 * \param list List index
2985 * \param size size index
2986 * \param uiQP Quantization parameter
2987 */
xSetScalingListDec(TComScalingList * scalingList,UInt listId,UInt sizeId,Int qp,const ChromaFormat format)2988 Void TComTrQuant::xSetScalingListDec(TComScalingList *scalingList, UInt listId, UInt sizeId, Int qp, const ChromaFormat format)
2989 {
2990 UInt width = g_scalingListSizeX[sizeId];
2991 UInt height = g_scalingListSizeX[sizeId];
2992 UInt ratio = g_scalingListSizeX[sizeId]/min(MAX_MATRIX_SIZE_NUM,(Int)g_scalingListSizeX[sizeId]);
2993 Int *dequantcoeff;
2994 Int *coeff = scalingList->getScalingListAddress(sizeId,listId);
2995
2996 dequantcoeff = getDequantCoeff(listId, qp, sizeId);
2997
2998 Int invQuantScale = g_invQuantScales[qp];
2999
3000 processScalingListDec(coeff,
3001 dequantcoeff,
3002 invQuantScale,
3003 height, width, ratio,
3004 min(MAX_MATRIX_SIZE_NUM, (Int)g_scalingListSizeX[sizeId]),
3005 scalingList->getScalingListDC(sizeId,listId));
3006 }
3007
3008 /** set flat matrix value to quantized coefficient
3009 */
setFlatScalingList(const ChromaFormat format)3010 Void TComTrQuant::setFlatScalingList(const ChromaFormat format)
3011 {
3012 const Int minimumQp = 0;
3013 const Int maximumQp = SCALING_LIST_REM_NUM;
3014
3015 for(UInt size = 0; size < SCALING_LIST_SIZE_NUM; size++)
3016 {
3017 for(UInt list = 0; list < SCALING_LIST_NUM; list++)
3018 {
3019 for(Int qp = minimumQp; qp < maximumQp; qp++)
3020 {
3021 xsetFlatScalingList(list,size,qp,format);
3022 setErrScaleCoeff(list,size,qp);
3023 }
3024 }
3025 }
3026 }
3027
3028 /** set flat matrix value to quantized coefficient
3029 * \param list List ID
3030 * \param uiQP Quantization parameter
3031 * \param uiSize Size
3032 */
xsetFlatScalingList(UInt list,UInt size,Int qp,const ChromaFormat format)3033 Void TComTrQuant::xsetFlatScalingList(UInt list, UInt size, Int qp, const ChromaFormat format)
3034 {
3035 UInt i,num = g_scalingListSize[size];
3036 Int *quantcoeff;
3037 Int *dequantcoeff;
3038
3039 Int quantScales = g_quantScales [qp];
3040 Int invQuantScales = g_invQuantScales[qp] << 4;
3041
3042 quantcoeff = getQuantCoeff(list, qp, size);
3043 dequantcoeff = getDequantCoeff(list, qp, size);
3044
3045 for(i=0;i<num;i++)
3046 {
3047 *quantcoeff++ = quantScales;
3048 *dequantcoeff++ = invQuantScales;
3049 }
3050 }
3051
3052 /** set quantized matrix coefficient for encode
3053 * \param coeff quantaized matrix address
3054 * \param quantcoeff quantaized matrix address
3055 * \param quantScales Q(QP%6)
3056 * \param height height
3057 * \param width width
3058 * \param ratio ratio for upscale
3059 * \param sizuNum matrix size
3060 * \param dc dc parameter
3061 */
processScalingListEnc(Int * coeff,Int * quantcoeff,Int quantScales,UInt height,UInt width,UInt ratio,Int sizuNum,UInt dc)3062 Void TComTrQuant::processScalingListEnc( Int *coeff, Int *quantcoeff, Int quantScales, UInt height, UInt width, UInt ratio, Int sizuNum, UInt dc)
3063 {
3064 for(UInt j=0;j<height;j++)
3065 {
3066 for(UInt i=0;i<width;i++)
3067 {
3068 quantcoeff[j*width + i] = quantScales / coeff[sizuNum * (j / ratio) + i / ratio];
3069 }
3070 }
3071
3072 if(ratio > 1)
3073 {
3074 quantcoeff[0] = quantScales / dc;
3075 }
3076 }
3077
3078 /** set quantized matrix coefficient for decode
3079 * \param coeff quantaized matrix address
3080 * \param dequantcoeff quantaized matrix address
3081 * \param invQuantScales IQ(QP%6))
3082 * \param height height
3083 * \param width width
3084 * \param ratio ratio for upscale
3085 * \param sizuNum matrix size
3086 * \param dc dc parameter
3087 */
processScalingListDec(Int * coeff,Int * dequantcoeff,Int invQuantScales,UInt height,UInt width,UInt ratio,Int sizuNum,UInt dc)3088 Void TComTrQuant::processScalingListDec( Int *coeff, Int *dequantcoeff, Int invQuantScales, UInt height, UInt width, UInt ratio, Int sizuNum, UInt dc)
3089 {
3090 for(UInt j=0;j<height;j++)
3091 {
3092 for(UInt i=0;i<width;i++)
3093 {
3094 dequantcoeff[j*width + i] = invQuantScales * coeff[sizuNum * (j / ratio) + i / ratio];
3095 }
3096 }
3097
3098 if(ratio > 1)
3099 {
3100 dequantcoeff[0] = invQuantScales * dc;
3101 }
3102 }
3103
3104 /** initialization process of scaling list array
3105 */
initScalingList()3106 Void TComTrQuant::initScalingList()
3107 {
3108 for(UInt sizeId = 0; sizeId < SCALING_LIST_SIZE_NUM; sizeId++)
3109 {
3110 for(UInt qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
3111 {
3112 for(UInt listId = 0; listId < SCALING_LIST_NUM; listId++)
3113 {
3114 m_quantCoef [sizeId][listId][qp] = new Int [g_scalingListSize[sizeId]];
3115 m_dequantCoef [sizeId][listId][qp] = new Int [g_scalingListSize[sizeId]];
3116 m_errScale [sizeId][listId][qp] = new Double [g_scalingListSize[sizeId]];
3117 } // listID loop
3118 }
3119 }
3120 }
3121
3122 /** destroy quantization matrix array
3123 */
destroyScalingList()3124 Void TComTrQuant::destroyScalingList()
3125 {
3126 for(UInt sizeId = 0; sizeId < SCALING_LIST_SIZE_NUM; sizeId++)
3127 {
3128 for(UInt listId = 0; listId < SCALING_LIST_NUM; listId++)
3129 {
3130 for(UInt qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
3131 {
3132 if(m_quantCoef [sizeId][listId][qp]) delete [] m_quantCoef [sizeId][listId][qp];
3133 if(m_dequantCoef [sizeId][listId][qp]) delete [] m_dequantCoef [sizeId][listId][qp];
3134 if(m_errScale [sizeId][listId][qp]) delete [] m_errScale [sizeId][listId][qp];
3135 }
3136 }
3137 }
3138 }
3139
transformSkipQuantOneSample(TComTU & rTu,const ComponentID compID,const Pel resiDiff,TCoeff * pcCoeff,const UInt uiPos,const QpParam & cQP,const Bool bUseHalfRoundingPoint)3140 Void TComTrQuant::transformSkipQuantOneSample(TComTU &rTu, const ComponentID compID, const Pel resiDiff, TCoeff* pcCoeff, const UInt uiPos, const QpParam &cQP, const Bool bUseHalfRoundingPoint)
3141 {
3142 TComDataCU *pcCU = rTu.getCU();
3143 const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
3144 const TComRectangle &rect = rTu.getRect(compID);
3145 const UInt uiWidth = rect.width;
3146 const UInt uiHeight = rect.height;
3147 const Int iTransformShift = getTransformShift(toChannelType(compID), rTu.GetEquivalentLog2TrSize(compID));
3148 const Int scalingListType = getScalingListType(pcCU->getPredictionMode(uiAbsPartIdx), compID);
3149 const Bool enableScalingLists = getUseScalingList(uiWidth, uiHeight, true);
3150 const Int defaultQuantisationCoefficient = g_quantScales[cQP.rem];
3151
3152 assert( scalingListType < SCALING_LIST_NUM );
3153 const Int *const piQuantCoeff = getQuantCoeff( scalingListType, cQP.rem, (rTu.GetEquivalentLog2TrSize(compID)-2) );
3154
3155
3156 /* for 422 chroma blocks, the effective scaling applied during transformation is not a power of 2, hence it cannot be
3157 * implemented as a bit-shift (the quantised result will be sqrt(2) * larger than required). Alternatively, adjust the
3158 * uiLog2TrSize applied in iTransformShift, such that the result is 1/sqrt(2) the required result (i.e. smaller)
3159 * Then a QP+3 (sqrt(2)) or QP-3 (1/sqrt(2)) method could be used to get the required result
3160 */
3161
3162 const Int iQBits = QUANT_SHIFT + cQP.per + iTransformShift;
3163 // QBits will be OK for any internal bit depth as the reduction in transform shift is balanced by an increase in Qp_per due to QpBDOffset
3164
3165 const Int iAdd = ( bUseHalfRoundingPoint ? 256 : (pcCU->getSlice()->getSliceType() == I_SLICE ? 171 : 85) ) << (iQBits - 9);
3166
3167 TCoeff transformedCoefficient;
3168
3169 // transform-skip
3170 if (iTransformShift >= 0)
3171 {
3172 transformedCoefficient = resiDiff << iTransformShift;
3173 }
3174 else // for very high bit depths
3175 {
3176 const Int iTrShiftNeg = -iTransformShift;
3177 const Int offset = 1 << (iTrShiftNeg - 1);
3178 transformedCoefficient = ( resiDiff + offset ) >> iTrShiftNeg;
3179 }
3180
3181 // quantization
3182 const TCoeff iSign = (transformedCoefficient < 0 ? -1: 1);
3183
3184 const Int quantisationCoefficient = enableScalingLists ? piQuantCoeff[uiPos] : defaultQuantisationCoefficient;
3185
3186 const Int64 tmpLevel = (Int64)abs(transformedCoefficient) * quantisationCoefficient;
3187
3188 const TCoeff quantisedCoefficient = (TCoeff((tmpLevel + iAdd ) >> iQBits)) * iSign;
3189
3190 const TCoeff entropyCodingMinimum = -(1 << g_maxTrDynamicRange[toChannelType(compID)]);
3191 const TCoeff entropyCodingMaximum = (1 << g_maxTrDynamicRange[toChannelType(compID)]) - 1;
3192 pcCoeff[ uiPos ] = Clip3<TCoeff>( entropyCodingMinimum, entropyCodingMaximum, quantisedCoefficient );
3193 }
3194
3195
invTrSkipDeQuantOneSample(TComTU & rTu,ComponentID compID,TCoeff inSample,Pel & reconSample,const QpParam & cQP,UInt uiPos)3196 Void TComTrQuant::invTrSkipDeQuantOneSample( TComTU &rTu, ComponentID compID, TCoeff inSample, Pel &reconSample, const QpParam &cQP, UInt uiPos )
3197 {
3198 TComDataCU *pcCU = rTu.getCU();
3199 const UInt uiAbsPartIdx = rTu.GetAbsPartIdxTU();
3200 const TComRectangle &rect = rTu.getRect(compID);
3201 const UInt uiWidth = rect.width;
3202 const UInt uiHeight = rect.height;
3203 const Int QP_per = cQP.per;
3204 const Int QP_rem = cQP.rem;
3205 const Int iTransformShift = getTransformShift(toChannelType(compID), rTu.GetEquivalentLog2TrSize(compID));
3206 const Int scalingListType = getScalingListType(pcCU->getPredictionMode(uiAbsPartIdx), compID);
3207 const Bool enableScalingLists = getUseScalingList(uiWidth, uiHeight, true);
3208 const UInt uiLog2TrSize = rTu.GetEquivalentLog2TrSize(compID);
3209
3210 assert( scalingListType < SCALING_LIST_NUM );
3211
3212 const Int rightShift = (IQUANT_SHIFT - (iTransformShift + QP_per)) + (enableScalingLists ? LOG2_SCALING_LIST_NEUTRAL_VALUE : 0);
3213
3214 const TCoeff transformMinimum = -(1 << g_maxTrDynamicRange[toChannelType(compID)]);
3215 const TCoeff transformMaximum = (1 << g_maxTrDynamicRange[toChannelType(compID)]) - 1;
3216
3217 // Dequantisation
3218
3219 TCoeff dequantisedSample;
3220
3221 if(enableScalingLists)
3222 {
3223 const UInt dequantCoefBits = 1 + IQUANT_SHIFT + SCALING_LIST_BITS;
3224 const UInt targetInputBitDepth = std::min<UInt>((g_maxTrDynamicRange[toChannelType(compID)] + 1), (((sizeof(Intermediate_Int) * 8) + rightShift) - dequantCoefBits));
3225
3226 const Intermediate_Int inputMinimum = -(1 << (targetInputBitDepth - 1));
3227 const Intermediate_Int inputMaximum = (1 << (targetInputBitDepth - 1)) - 1;
3228
3229 Int *piDequantCoef = getDequantCoeff(scalingListType,QP_rem,uiLog2TrSize-2);
3230
3231 if(rightShift > 0)
3232 {
3233 const Intermediate_Int iAdd = 1 << (rightShift - 1);
3234 const TCoeff clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, inSample));
3235 const Intermediate_Int iCoeffQ = ((Intermediate_Int(clipQCoef) * piDequantCoef[uiPos]) + iAdd ) >> rightShift;
3236
3237 dequantisedSample = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ));
3238 }
3239 else
3240 {
3241 const Int leftShift = -rightShift;
3242 const TCoeff clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, inSample));
3243 const Intermediate_Int iCoeffQ = (Intermediate_Int(clipQCoef) * piDequantCoef[uiPos]) << leftShift;
3244
3245 dequantisedSample = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ));
3246 }
3247 }
3248 else
3249 {
3250 const Int scale = g_invQuantScales[QP_rem];
3251 const Int scaleBits = (IQUANT_SHIFT + 1) ;
3252
3253 const UInt targetInputBitDepth = std::min<UInt>((g_maxTrDynamicRange[toChannelType(compID)] + 1), (((sizeof(Intermediate_Int) * 8) + rightShift) - scaleBits));
3254 const Intermediate_Int inputMinimum = -(1 << (targetInputBitDepth - 1));
3255 const Intermediate_Int inputMaximum = (1 << (targetInputBitDepth - 1)) - 1;
3256
3257 if (rightShift > 0)
3258 {
3259 const Intermediate_Int iAdd = 1 << (rightShift - 1);
3260 const TCoeff clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, inSample));
3261 const Intermediate_Int iCoeffQ = (Intermediate_Int(clipQCoef) * scale + iAdd) >> rightShift;
3262
3263 dequantisedSample = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ));
3264 }
3265 else
3266 {
3267 const Int leftShift = -rightShift;
3268 const TCoeff clipQCoef = TCoeff(Clip3<Intermediate_Int>(inputMinimum, inputMaximum, inSample));
3269 const Intermediate_Int iCoeffQ = (Intermediate_Int(clipQCoef) * scale) << leftShift;
3270
3271 dequantisedSample = TCoeff(Clip3<Intermediate_Int>(transformMinimum,transformMaximum,iCoeffQ));
3272 }
3273 }
3274
3275 // Inverse transform-skip
3276
3277 if (iTransformShift >= 0)
3278 {
3279 const TCoeff offset = iTransformShift==0 ? 0 : (1 << (iTransformShift - 1));
3280 reconSample = Pel(( dequantisedSample + offset ) >> iTransformShift);
3281 }
3282 else //for very high bit depths
3283 {
3284 const Int iTrShiftNeg = -iTransformShift;
3285 reconSample = Pel(dequantisedSample << iTrShiftNeg);
3286 }
3287 }
3288
3289
crossComponentPrediction(TComTU & rTu,const ComponentID compID,const Pel * piResiL,const Pel * piResiC,Pel * piResiT,const Int width,const Int height,const Int strideL,const Int strideC,const Int strideT,const Bool reverse)3290 Void TComTrQuant::crossComponentPrediction( TComTU & rTu,
3291 const ComponentID compID,
3292 const Pel * piResiL,
3293 const Pel * piResiC,
3294 Pel * piResiT,
3295 const Int width,
3296 const Int height,
3297 const Int strideL,
3298 const Int strideC,
3299 const Int strideT,
3300 const Bool reverse )
3301 {
3302 const Pel *pResiL = piResiL;
3303 const Pel *pResiC = piResiC;
3304 Pel *pResiT = piResiT;
3305
3306 TComDataCU *pCU = rTu.getCU();
3307 const Char alpha = pCU->getCrossComponentPredictionAlpha( rTu.GetAbsPartIdxTU( compID ), compID );
3308 const Int diffBitDepth = pCU->getSlice()->getSPS()->getDifferentialLumaChromaBitDepth();
3309
3310 for( Int y = 0; y < height; y++ )
3311 {
3312 if (reverse)
3313 {
3314 for( Int x = 0; x < width; x++ )
3315 {
3316 pResiT[x] = pResiC[x] + (( alpha * rightShift( pResiL[x], diffBitDepth) ) >> 3);
3317 }
3318 }
3319 else
3320 {
3321 for( Int x = 0; x < width; x++ )
3322 {
3323 pResiT[x] = pResiC[x] - (( alpha * rightShift(pResiL[x], diffBitDepth) ) >> 3);
3324 }
3325 }
3326
3327 pResiL += strideL;
3328 pResiC += strideC;
3329 pResiT += strideT;
3330 }
3331 }
3332
3333 //! \}
3334