1 /* The copyright in this software is being made available under the BSD
2  * License, included below. This software may be subject to other third party
3  * and contributor rights, including patent rights, and no such rights are
4  * granted under this license.
5  *
6  * Copyright (c) 2010-2014, ITU/ISO/IEC
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions are met:
11  *
12  *  * Redistributions of source code must retain the above copyright notice,
13  *    this list of conditions and the following disclaimer.
14  *  * Redistributions in binary form must reproduce the above copyright notice,
15  *    this list of conditions and the following disclaimer in the documentation
16  *    and/or other materials provided with the distribution.
17  *  * Neither the name of the ITU/ISO/IEC nor the names of its contributors may
18  *    be used to endorse or promote products derived from this software without
19  *    specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
25  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
31  * THE POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 /** \file     TComRdCost.cpp
35     \brief    RD cost computation class
36 */
37 
38 #include <math.h>
39 #include <assert.h>
40 #include "TComRom.h"
41 #include "TComRdCost.h"
42 
43 //! \ingroup TLibCommon
44 //! \{
45 
TComRdCost()46 TComRdCost::TComRdCost()
47 {
48   init();
49 }
50 
~TComRdCost()51 TComRdCost::~TComRdCost()
52 {
53 }
54 
55 // Calculate RD functions
calcRdCost(UInt uiBits,Distortion uiDistortion,Bool bFlag,DFunc eDFunc)56 Double TComRdCost::calcRdCost( UInt uiBits, Distortion uiDistortion, Bool bFlag, DFunc eDFunc )
57 {
58   Double dRdCost = 0.0;
59   Double dLambda = 0.0;
60 
61   switch ( eDFunc )
62   {
63     case DF_SSE:
64       assert(0);
65       break;
66     case DF_SAD:
67 #if RExt__HIGH_BIT_DEPTH_SUPPORT
68       dLambda = m_dLambdaMotionSAD[0]; // 0 is valid, because for lossless blocks, the cost equation is modified to compensate.
69 #else
70       dLambda = (Double)m_uiLambdaMotionSAD[0]; // 0 is valid, because for lossless blocks, the cost equation is modified to compensate.
71 #endif
72       break;
73     case DF_DEFAULT:
74       dLambda =         m_dLambda;
75       break;
76     case DF_SSE_FRAME:
77       dLambda =         m_dFrameLambda;
78       break;
79     default:
80       assert (0);
81       break;
82   }
83 
84   if (bFlag) //NOTE: this "bFlag" is never true
85   {
86     // Intra8x8, Intra4x4 Block only...
87     if (m_costMode != COST_STANDARD_LOSSY)
88     {
89       dRdCost = (Double(uiDistortion) / dLambda) + Double(uiBits); // all lossless costs would have uiDistortion=0, and therefore this cost function can be used.
90     }
91     else
92     {
93       dRdCost = (((Double)uiDistortion) + ((Double)uiBits * dLambda));
94     }
95   }
96   else
97   {
98     if (eDFunc == DF_SAD)
99     {
100       if (m_costMode != COST_STANDARD_LOSSY)
101       {
102         dRdCost = ((Double(uiDistortion) * 65536) / dLambda) + Double(uiBits); // all lossless costs would have uiDistortion=0, and therefore this cost function can be used.
103       }
104       else
105       {
106         dRdCost = floor(Double(uiDistortion) + (floor((Double(uiBits) * dLambda) + 0.5) / 65536.0));
107       }
108     }
109     else
110     {
111       if (m_costMode != COST_STANDARD_LOSSY)
112       {
113         dRdCost = (Double(uiDistortion) / dLambda) + Double(uiBits); // all lossless costs would have uiDistortion=0, and therefore this cost function can be used.
114       }
115       else
116       {
117         dRdCost = floor(Double(uiDistortion) + (Double(uiBits) * dLambda) + 0.5);
118       }
119     }
120   }
121 
122   return dRdCost;
123 }
124 
calcRdCost64(UInt64 uiBits,UInt64 uiDistortion,Bool bFlag,DFunc eDFunc)125 Double TComRdCost::calcRdCost64( UInt64 uiBits, UInt64 uiDistortion, Bool bFlag, DFunc eDFunc )
126 {
127   Double dRdCost = 0.0;
128   Double dLambda = 0.0;
129 
130   switch ( eDFunc )
131   {
132     case DF_SSE:
133       assert(0);
134       break;
135     case DF_SAD:
136 #if RExt__HIGH_BIT_DEPTH_SUPPORT
137       dLambda = m_dLambdaMotionSAD[0]; // 0 is valid, because for lossless blocks, the cost equation is modified to compensate.
138 #else
139       dLambda = (Double)m_uiLambdaMotionSAD[0]; // 0 is valid, because for lossless blocks, the cost equation is modified to compensate.
140 #endif
141       break;
142     case DF_DEFAULT:
143       dLambda =         m_dLambda;
144       break;
145     case DF_SSE_FRAME:
146       dLambda =         m_dFrameLambda;
147       break;
148     default:
149       assert (0);
150       break;
151   }
152 
153   if (bFlag) //NOTE: this "bFlag" is never true
154   {
155     // Intra8x8, Intra4x4 Block only...
156     if (m_costMode != COST_STANDARD_LOSSY)
157     {
158       dRdCost = (Double(uiDistortion) / dLambda) + Double(uiBits); // all lossless costs would have uiDistortion=0, and therefore this cost function can be used.
159     }
160     else
161     {
162       dRdCost = (((Double)(Int64)uiDistortion) + ((Double)(Int64)uiBits * dLambda));
163     }
164   }
165   else
166   {
167     if (eDFunc == DF_SAD)
168     {
169       if (m_costMode != COST_STANDARD_LOSSY)
170       {
171         dRdCost = ((Double(uiDistortion) * 65536) / dLambda) + Double(uiBits); // all lossless costs would have uiDistortion=0, and therefore this cost function can be used.
172       }
173       else
174       {
175         dRdCost = floor(Double(uiDistortion) + (floor((Double(uiBits) * dLambda) + 0.5) / 65536.0));
176       }
177     }
178     else
179     {
180       if (m_costMode != COST_STANDARD_LOSSY)
181       {
182         dRdCost = (Double(uiDistortion) / dLambda) + Double(uiBits); // all lossless costs would have uiDistortion=0, and therefore this cost function can be used.
183       }
184       else
185       {
186         dRdCost = floor(Double(uiDistortion) + (Double(uiBits) * dLambda) + 0.5);
187       }
188     }
189   }
190 
191   return dRdCost;
192 }
193 
setLambda(Double dLambda)194 Void TComRdCost::setLambda( Double dLambda )
195 {
196   m_dLambda           = dLambda;
197   m_sqrtLambda        = sqrt(m_dLambda);
198 #if RExt__HIGH_BIT_DEPTH_SUPPORT
199   m_dLambdaMotionSAD[0] = 65536.0 * m_sqrtLambda;
200   m_dLambdaMotionSSE[0] = 65536.0 * m_dLambda;
201 #if FULL_NBIT
202   dLambda = 0.57 * pow(2.0, ((LOSSLESS_AND_MIXED_LOSSLESS_RD_COST_TEST_QP_PRIME - 12) / 3.0));
203 #else
204   dLambda = 0.57 * pow(2.0, ((LOSSLESS_AND_MIXED_LOSSLESS_RD_COST_TEST_QP_PRIME - 12 - 6 * (g_bitDepth[CHANNEL_TYPE_LUMA] - 8)) / 3.0));
205 #endif
206   m_dLambdaMotionSAD[1] = 65536.0 * sqrt(dLambda);
207   m_dLambdaMotionSSE[1] = 65536.0 * dLambda;
208 #else
209   m_uiLambdaMotionSAD[0] = (UInt)floor(65536.0 * m_sqrtLambda);
210   m_uiLambdaMotionSSE[0] = (UInt)floor(65536.0 * m_dLambda   );
211 #if FULL_NBIT
212   dLambda = 0.57 * pow(2.0, ((LOSSLESS_AND_MIXED_LOSSLESS_RD_COST_TEST_QP_PRIME - 12) / 3.0));
213 #else
214   dLambda = 0.57 * pow(2.0, ((LOSSLESS_AND_MIXED_LOSSLESS_RD_COST_TEST_QP_PRIME - 12 - 6 * (g_bitDepth[CHANNEL_TYPE_LUMA] - 8)) / 3.0));
215 #endif
216   m_uiLambdaMotionSAD[1] = (UInt)floor(65536.0 * sqrt(dLambda));
217   m_uiLambdaMotionSSE[1] = (UInt)floor(65536.0 * dLambda   );
218 #endif
219 }
220 
221 
222 // Initalize Function Pointer by [eDFunc]
init()223 Void TComRdCost::init()
224 {
225   m_afpDistortFunc[DF_DEFAULT] = NULL;                  // for DF_DEFAULT
226 
227   m_afpDistortFunc[DF_SSE    ] = TComRdCost::xGetSSE;
228   m_afpDistortFunc[DF_SSE4   ] = TComRdCost::xGetSSE4;
229   m_afpDistortFunc[DF_SSE8   ] = TComRdCost::xGetSSE8;
230   m_afpDistortFunc[DF_SSE16  ] = TComRdCost::xGetSSE16;
231   m_afpDistortFunc[DF_SSE32  ] = TComRdCost::xGetSSE32;
232   m_afpDistortFunc[DF_SSE64  ] = TComRdCost::xGetSSE64;
233   m_afpDistortFunc[DF_SSE16N ] = TComRdCost::xGetSSE16N;
234 
235   m_afpDistortFunc[DF_SAD    ] = TComRdCost::xGetSAD;
236   m_afpDistortFunc[DF_SAD4   ] = TComRdCost::xGetSAD4;
237   m_afpDistortFunc[DF_SAD8   ] = TComRdCost::xGetSAD8;
238   m_afpDistortFunc[DF_SAD16  ] = TComRdCost::xGetSAD16;
239   m_afpDistortFunc[DF_SAD32  ] = TComRdCost::xGetSAD32;
240   m_afpDistortFunc[DF_SAD64  ] = TComRdCost::xGetSAD64;
241   m_afpDistortFunc[DF_SAD16N ] = TComRdCost::xGetSAD16N;
242 
243   m_afpDistortFunc[DF_SADS   ] = TComRdCost::xGetSAD;
244   m_afpDistortFunc[DF_SADS4  ] = TComRdCost::xGetSAD4;
245   m_afpDistortFunc[DF_SADS8  ] = TComRdCost::xGetSAD8;
246   m_afpDistortFunc[DF_SADS16 ] = TComRdCost::xGetSAD16;
247   m_afpDistortFunc[DF_SADS32 ] = TComRdCost::xGetSAD32;
248   m_afpDistortFunc[DF_SADS64 ] = TComRdCost::xGetSAD64;
249   m_afpDistortFunc[DF_SADS16N] = TComRdCost::xGetSAD16N;
250 
251 #if AMP_SAD
252   m_afpDistortFunc[DF_SAD12  ] = TComRdCost::xGetSAD12;
253   m_afpDistortFunc[DF_SAD24  ] = TComRdCost::xGetSAD24;
254   m_afpDistortFunc[DF_SAD48  ] = TComRdCost::xGetSAD48;
255 
256   m_afpDistortFunc[DF_SADS12 ] = TComRdCost::xGetSAD12;
257   m_afpDistortFunc[DF_SADS24 ] = TComRdCost::xGetSAD24;
258   m_afpDistortFunc[DF_SADS48 ] = TComRdCost::xGetSAD48;
259 #endif
260   m_afpDistortFunc[DF_HADS   ] = TComRdCost::xGetHADs;
261   m_afpDistortFunc[DF_HADS4  ] = TComRdCost::xGetHADs;
262   m_afpDistortFunc[DF_HADS8  ] = TComRdCost::xGetHADs;
263   m_afpDistortFunc[DF_HADS16 ] = TComRdCost::xGetHADs;
264   m_afpDistortFunc[DF_HADS32 ] = TComRdCost::xGetHADs;
265   m_afpDistortFunc[DF_HADS64 ] = TComRdCost::xGetHADs;
266   m_afpDistortFunc[DF_HADS16N] = TComRdCost::xGetHADs;
267 
268   m_costMode                   = COST_STANDARD_LOSSY;
269 
270 #if RExt__HIGH_BIT_DEPTH_SUPPORT
271   m_dCost                      = 0;
272 #else
273   m_uiCost                     = 0;
274 #endif
275   m_iCostScale                 = 0;
276 }
277 
xGetComponentBits(Int iVal)278 UInt TComRdCost::xGetComponentBits( Int iVal )
279 {
280   UInt uiLength = 1;
281   UInt uiTemp   = ( iVal <= 0) ? (-iVal<<1)+1: (iVal<<1);
282 
283   assert ( uiTemp );
284 
285   while ( 1 != uiTemp )
286   {
287     uiTemp >>= 1;
288     uiLength += 2;
289   }
290 
291   return uiLength;
292 }
293 
setDistParam(UInt uiBlkWidth,UInt uiBlkHeight,DFunc eDFunc,DistParam & rcDistParam)294 Void TComRdCost::setDistParam( UInt uiBlkWidth, UInt uiBlkHeight, DFunc eDFunc, DistParam& rcDistParam )
295 {
296   // set Block Width / Height
297   rcDistParam.iCols    = uiBlkWidth;
298   rcDistParam.iRows    = uiBlkHeight;
299   rcDistParam.DistFunc = m_afpDistortFunc[eDFunc + g_aucConvertToBit[ rcDistParam.iCols ] + 1 ];
300 
301   // initialize
302   rcDistParam.iSubShift  = 0;
303 }
304 
305 // Setting the Distortion Parameter for Inter (ME)
setDistParam(TComPattern * pcPatternKey,Pel * piRefY,Int iRefStride,DistParam & rcDistParam)306 Void TComRdCost::setDistParam( TComPattern* pcPatternKey, Pel* piRefY, Int iRefStride, DistParam& rcDistParam )
307 {
308   // set Original & Curr Pointer / Stride
309   rcDistParam.pOrg = pcPatternKey->getROIY();
310   rcDistParam.pCur = piRefY;
311 
312   rcDistParam.iStrideOrg = pcPatternKey->getPatternLStride();
313   rcDistParam.iStrideCur = iRefStride;
314 
315   // set Block Width / Height
316   rcDistParam.iCols    = pcPatternKey->getROIYWidth();
317   rcDistParam.iRows    = pcPatternKey->getROIYHeight();
318   rcDistParam.DistFunc = m_afpDistortFunc[DF_SAD + g_aucConvertToBit[ rcDistParam.iCols ] + 1 ];
319 
320 #if AMP_SAD
321   if (rcDistParam.iCols == 12)
322   {
323     rcDistParam.DistFunc = m_afpDistortFunc[DF_SAD12];
324   }
325   else if (rcDistParam.iCols == 24)
326   {
327     rcDistParam.DistFunc = m_afpDistortFunc[DF_SAD24];
328   }
329   else if (rcDistParam.iCols == 48)
330   {
331     rcDistParam.DistFunc = m_afpDistortFunc[DF_SAD48];
332   }
333 #endif
334 
335   // initialize
336   rcDistParam.iSubShift  = 0;
337 }
338 
339 // Setting the Distortion Parameter for Inter (subpel ME with step)
setDistParam(TComPattern * pcPatternKey,Pel * piRefY,Int iRefStride,Int iStep,DistParam & rcDistParam,Bool bHADME)340 Void TComRdCost::setDistParam( TComPattern* pcPatternKey, Pel* piRefY, Int iRefStride, Int iStep, DistParam& rcDistParam, Bool bHADME )
341 {
342   // set Original & Curr Pointer / Stride
343   rcDistParam.pOrg = pcPatternKey->getROIY();
344   rcDistParam.pCur = piRefY;
345 
346   rcDistParam.iStrideOrg = pcPatternKey->getPatternLStride();
347   rcDistParam.iStrideCur = iRefStride * iStep;
348 
349   // set Step for interpolated buffer
350   rcDistParam.iStep = iStep;
351 
352   // set Block Width / Height
353   rcDistParam.iCols    = pcPatternKey->getROIYWidth();
354   rcDistParam.iRows    = pcPatternKey->getROIYHeight();
355 
356   // set distortion function
357   if ( !bHADME )
358   {
359     rcDistParam.DistFunc = m_afpDistortFunc[DF_SADS + g_aucConvertToBit[ rcDistParam.iCols ] + 1 ];
360 #if AMP_SAD
361     if (rcDistParam.iCols == 12)
362     {
363       rcDistParam.DistFunc = m_afpDistortFunc[DF_SADS12];
364     }
365     else if (rcDistParam.iCols == 24)
366     {
367       rcDistParam.DistFunc = m_afpDistortFunc[DF_SADS24];
368     }
369     else if (rcDistParam.iCols == 48)
370     {
371       rcDistParam.DistFunc = m_afpDistortFunc[DF_SADS48];
372     }
373 #endif
374   }
375   else
376   {
377     rcDistParam.DistFunc = m_afpDistortFunc[DF_HADS + g_aucConvertToBit[ rcDistParam.iCols ] + 1 ];
378   }
379 
380   // initialize
381   rcDistParam.iSubShift  = 0;
382 }
383 
setDistParam(DistParam & rcDP,Int bitDepth,Pel * p1,Int iStride1,Pel * p2,Int iStride2,Int iWidth,Int iHeight,Bool bHadamard)384 Void TComRdCost::setDistParam( DistParam& rcDP, Int bitDepth, Pel* p1, Int iStride1, Pel* p2, Int iStride2, Int iWidth, Int iHeight, Bool bHadamard )
385 {
386   rcDP.pOrg       = p1;
387   rcDP.pCur       = p2;
388   rcDP.iStrideOrg = iStride1;
389   rcDP.iStrideCur = iStride2;
390   rcDP.iCols      = iWidth;
391   rcDP.iRows      = iHeight;
392   rcDP.iStep      = 1;
393   rcDP.iSubShift  = 0;
394   rcDP.bitDepth   = bitDepth;
395   rcDP.DistFunc   = m_afpDistortFunc[ ( bHadamard ? DF_HADS : DF_SADS ) + g_aucConvertToBit[ iWidth ] + 1 ];
396 }
397 
calcHAD(Int bitDepth,Pel * pi0,Int iStride0,Pel * pi1,Int iStride1,Int iWidth,Int iHeight)398 Distortion TComRdCost::calcHAD( Int bitDepth, Pel* pi0, Int iStride0, Pel* pi1, Int iStride1, Int iWidth, Int iHeight )
399 {
400   Distortion uiSum = 0;
401   Int x, y;
402 
403   if ( ( (iWidth % 8) == 0 ) && ( (iHeight % 8) == 0 ) )
404   {
405     for ( y=0; y<iHeight; y+= 8 )
406     {
407       for ( x=0; x<iWidth; x+= 8 )
408       {
409         uiSum += xCalcHADs8x8( &pi0[x], &pi1[x], iStride0, iStride1, 1 );
410       }
411       pi0 += iStride0*8;
412       pi1 += iStride1*8;
413     }
414   }
415   else
416   {
417     assert ( ( (iWidth % 4) == 0 ) && ( (iHeight % 4) == 0 ) );
418 
419     for ( y=0; y<iHeight; y+= 4 )
420     {
421       for ( x=0; x<iWidth; x+= 4 )
422       {
423         uiSum += xCalcHADs4x4( &pi0[x], &pi1[x], iStride0, iStride1, 1 );
424       }
425       pi0 += iStride0*4;
426       pi1 += iStride1*4;
427     }
428   }
429 
430   return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(bitDepth-8) );
431 }
432 
getDistPart(Int bitDepth,Pel * piCur,Int iCurStride,Pel * piOrg,Int iOrgStride,UInt uiBlkWidth,UInt uiBlkHeight,const ComponentID compID,DFunc eDFunc)433 Distortion TComRdCost::getDistPart( Int bitDepth, Pel* piCur, Int iCurStride,  Pel* piOrg, Int iOrgStride, UInt uiBlkWidth, UInt uiBlkHeight, const ComponentID compID, DFunc eDFunc )
434 {
435   DistParam cDtParam;
436   setDistParam( uiBlkWidth, uiBlkHeight, eDFunc, cDtParam );
437   cDtParam.pOrg       = piOrg;
438   cDtParam.pCur       = piCur;
439   cDtParam.iStrideOrg = iOrgStride;
440   cDtParam.iStrideCur = iCurStride;
441   cDtParam.iStep      = 1;
442 
443   cDtParam.bApplyWeight = false;
444   cDtParam.compIdx      = MAX_NUM_COMPONENT; // just for assert: to be sure it was set before use
445   cDtParam.bitDepth     = bitDepth;
446 
447   if (isChroma(compID))
448   {
449     return ((Distortion) (m_distortionWeight[compID] * cDtParam.DistFunc( &cDtParam )));
450   }
451   else
452   {
453     return cDtParam.DistFunc( &cDtParam );
454   }
455 }
456 
457 // ====================================================================================================================
458 // Distortion functions
459 // ====================================================================================================================
460 
461 // --------------------------------------------------------------------------------------------------------------------
462 // SAD
463 // --------------------------------------------------------------------------------------------------------------------
464 
xGetSAD(DistParam * pcDtParam)465 Distortion TComRdCost::xGetSAD( DistParam* pcDtParam )
466 {
467   if ( pcDtParam->bApplyWeight )
468   {
469     return TComRdCostWeightPrediction::xGetSADw( pcDtParam );
470   }
471   const Pel* piOrg   = pcDtParam->pOrg;
472   const Pel* piCur   = pcDtParam->pCur;
473   Int  iRows   = pcDtParam->iRows;
474   Int  iCols   = pcDtParam->iCols;
475   Int  iStrideCur = pcDtParam->iStrideCur;
476   Int  iStrideOrg = pcDtParam->iStrideOrg;
477 
478   Distortion uiSum = 0;
479 
480   for( ; iRows != 0; iRows-- )
481   {
482     for (Int n = 0; n < iCols; n++ )
483     {
484       uiSum += abs( piOrg[n] - piCur[n] );
485     }
486     piOrg += iStrideOrg;
487     piCur += iStrideCur;
488   }
489 
490   return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) );
491 }
492 
xGetSAD4(DistParam * pcDtParam)493 Distortion TComRdCost::xGetSAD4( DistParam* pcDtParam )
494 {
495   if ( pcDtParam->bApplyWeight )
496   {
497     return TComRdCostWeightPrediction::xGetSADw( pcDtParam );
498   }
499   const Pel* piOrg   = pcDtParam->pOrg;
500   const Pel* piCur   = pcDtParam->pCur;
501   Int  iRows   = pcDtParam->iRows;
502   Int  iSubShift  = pcDtParam->iSubShift;
503   Int  iSubStep   = ( 1 << iSubShift );
504   Int  iStrideCur = pcDtParam->iStrideCur*iSubStep;
505   Int  iStrideOrg = pcDtParam->iStrideOrg*iSubStep;
506 
507   Distortion uiSum = 0;
508 
509   for( ; iRows != 0; iRows-=iSubStep )
510   {
511     uiSum += abs( piOrg[0] - piCur[0] );
512     uiSum += abs( piOrg[1] - piCur[1] );
513     uiSum += abs( piOrg[2] - piCur[2] );
514     uiSum += abs( piOrg[3] - piCur[3] );
515 
516     piOrg += iStrideOrg;
517     piCur += iStrideCur;
518   }
519 
520   uiSum <<= iSubShift;
521   return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) );
522 }
523 
xGetSAD8(DistParam * pcDtParam)524 Distortion TComRdCost::xGetSAD8( DistParam* pcDtParam )
525 {
526   if ( pcDtParam->bApplyWeight )
527   {
528     return TComRdCostWeightPrediction::xGetSADw( pcDtParam );
529   }
530   const Pel* piOrg      = pcDtParam->pOrg;
531   const Pel* piCur      = pcDtParam->pCur;
532   Int  iRows      = pcDtParam->iRows;
533   Int  iSubShift  = pcDtParam->iSubShift;
534   Int  iSubStep   = ( 1 << iSubShift );
535   Int  iStrideCur = pcDtParam->iStrideCur*iSubStep;
536   Int  iStrideOrg = pcDtParam->iStrideOrg*iSubStep;
537 
538   Distortion uiSum = 0;
539 
540   for( ; iRows != 0; iRows-=iSubStep )
541   {
542     uiSum += abs( piOrg[0] - piCur[0] );
543     uiSum += abs( piOrg[1] - piCur[1] );
544     uiSum += abs( piOrg[2] - piCur[2] );
545     uiSum += abs( piOrg[3] - piCur[3] );
546     uiSum += abs( piOrg[4] - piCur[4] );
547     uiSum += abs( piOrg[5] - piCur[5] );
548     uiSum += abs( piOrg[6] - piCur[6] );
549     uiSum += abs( piOrg[7] - piCur[7] );
550 
551     piOrg += iStrideOrg;
552     piCur += iStrideCur;
553   }
554 
555   uiSum <<= iSubShift;
556   return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) );
557 }
558 
xGetSAD16(DistParam * pcDtParam)559 Distortion TComRdCost::xGetSAD16( DistParam* pcDtParam )
560 {
561   if ( pcDtParam->bApplyWeight )
562   {
563     return TComRdCostWeightPrediction::xGetSADw( pcDtParam );
564   }
565   const Pel* piOrg   = pcDtParam->pOrg;
566   const Pel* piCur   = pcDtParam->pCur;
567   Int  iRows   = pcDtParam->iRows;
568   Int  iSubShift  = pcDtParam->iSubShift;
569   Int  iSubStep   = ( 1 << iSubShift );
570   Int  iStrideCur = pcDtParam->iStrideCur*iSubStep;
571   Int  iStrideOrg = pcDtParam->iStrideOrg*iSubStep;
572 
573   Distortion uiSum = 0;
574 
575   for( ; iRows != 0; iRows-=iSubStep )
576   {
577     uiSum += abs( piOrg[0] - piCur[0] );
578     uiSum += abs( piOrg[1] - piCur[1] );
579     uiSum += abs( piOrg[2] - piCur[2] );
580     uiSum += abs( piOrg[3] - piCur[3] );
581     uiSum += abs( piOrg[4] - piCur[4] );
582     uiSum += abs( piOrg[5] - piCur[5] );
583     uiSum += abs( piOrg[6] - piCur[6] );
584     uiSum += abs( piOrg[7] - piCur[7] );
585     uiSum += abs( piOrg[8] - piCur[8] );
586     uiSum += abs( piOrg[9] - piCur[9] );
587     uiSum += abs( piOrg[10] - piCur[10] );
588     uiSum += abs( piOrg[11] - piCur[11] );
589     uiSum += abs( piOrg[12] - piCur[12] );
590     uiSum += abs( piOrg[13] - piCur[13] );
591     uiSum += abs( piOrg[14] - piCur[14] );
592     uiSum += abs( piOrg[15] - piCur[15] );
593 
594     piOrg += iStrideOrg;
595     piCur += iStrideCur;
596   }
597 
598   uiSum <<= iSubShift;
599   return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) );
600 }
601 
602 #if AMP_SAD
xGetSAD12(DistParam * pcDtParam)603 Distortion TComRdCost::xGetSAD12( DistParam* pcDtParam )
604 {
605   if ( pcDtParam->bApplyWeight )
606   {
607     return TComRdCostWeightPrediction::xGetSADw( pcDtParam );
608   }
609   const Pel* piOrg   = pcDtParam->pOrg;
610   const Pel* piCur   = pcDtParam->pCur;
611   Int  iRows   = pcDtParam->iRows;
612   Int  iSubShift  = pcDtParam->iSubShift;
613   Int  iSubStep   = ( 1 << iSubShift );
614   Int  iStrideCur = pcDtParam->iStrideCur*iSubStep;
615   Int  iStrideOrg = pcDtParam->iStrideOrg*iSubStep;
616 
617   Distortion uiSum = 0;
618 
619   for( ; iRows != 0; iRows-=iSubStep )
620   {
621     uiSum += abs( piOrg[0] - piCur[0] );
622     uiSum += abs( piOrg[1] - piCur[1] );
623     uiSum += abs( piOrg[2] - piCur[2] );
624     uiSum += abs( piOrg[3] - piCur[3] );
625     uiSum += abs( piOrg[4] - piCur[4] );
626     uiSum += abs( piOrg[5] - piCur[5] );
627     uiSum += abs( piOrg[6] - piCur[6] );
628     uiSum += abs( piOrg[7] - piCur[7] );
629     uiSum += abs( piOrg[8] - piCur[8] );
630     uiSum += abs( piOrg[9] - piCur[9] );
631     uiSum += abs( piOrg[10] - piCur[10] );
632     uiSum += abs( piOrg[11] - piCur[11] );
633 
634     piOrg += iStrideOrg;
635     piCur += iStrideCur;
636   }
637 
638   uiSum <<= iSubShift;
639   return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) );
640 }
641 #endif
642 
xGetSAD16N(DistParam * pcDtParam)643 Distortion TComRdCost::xGetSAD16N( DistParam* pcDtParam )
644 {
645   const Pel* piOrg   = pcDtParam->pOrg;
646   const Pel* piCur   = pcDtParam->pCur;
647   Int  iRows   = pcDtParam->iRows;
648   Int  iCols   = pcDtParam->iCols;
649   Int  iSubShift  = pcDtParam->iSubShift;
650   Int  iSubStep   = ( 1 << iSubShift );
651   Int  iStrideCur = pcDtParam->iStrideCur*iSubStep;
652   Int  iStrideOrg = pcDtParam->iStrideOrg*iSubStep;
653 
654   Distortion uiSum = 0;
655 
656   for( ; iRows != 0; iRows-=iSubStep )
657   {
658     for (Int n = 0; n < iCols; n+=16 )
659     {
660       uiSum += abs( piOrg[n+ 0] - piCur[n+ 0] );
661       uiSum += abs( piOrg[n+ 1] - piCur[n+ 1] );
662       uiSum += abs( piOrg[n+ 2] - piCur[n+ 2] );
663       uiSum += abs( piOrg[n+ 3] - piCur[n+ 3] );
664       uiSum += abs( piOrg[n+ 4] - piCur[n+ 4] );
665       uiSum += abs( piOrg[n+ 5] - piCur[n+ 5] );
666       uiSum += abs( piOrg[n+ 6] - piCur[n+ 6] );
667       uiSum += abs( piOrg[n+ 7] - piCur[n+ 7] );
668       uiSum += abs( piOrg[n+ 8] - piCur[n+ 8] );
669       uiSum += abs( piOrg[n+ 9] - piCur[n+ 9] );
670       uiSum += abs( piOrg[n+10] - piCur[n+10] );
671       uiSum += abs( piOrg[n+11] - piCur[n+11] );
672       uiSum += abs( piOrg[n+12] - piCur[n+12] );
673       uiSum += abs( piOrg[n+13] - piCur[n+13] );
674       uiSum += abs( piOrg[n+14] - piCur[n+14] );
675       uiSum += abs( piOrg[n+15] - piCur[n+15] );
676     }
677     piOrg += iStrideOrg;
678     piCur += iStrideCur;
679   }
680 
681   uiSum <<= iSubShift;
682   return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) );
683 }
684 
xGetSAD32(DistParam * pcDtParam)685 Distortion TComRdCost::xGetSAD32( DistParam* pcDtParam )
686 {
687   if ( pcDtParam->bApplyWeight )
688   {
689     return TComRdCostWeightPrediction::xGetSADw( pcDtParam );
690   }
691   const Pel* piOrg   = pcDtParam->pOrg;
692   const Pel* piCur   = pcDtParam->pCur;
693   Int  iRows   = pcDtParam->iRows;
694   Int  iSubShift  = pcDtParam->iSubShift;
695   Int  iSubStep   = ( 1 << iSubShift );
696   Int  iStrideCur = pcDtParam->iStrideCur*iSubStep;
697   Int  iStrideOrg = pcDtParam->iStrideOrg*iSubStep;
698 
699   Distortion uiSum = 0;
700 
701   for( ; iRows != 0; iRows-=iSubStep )
702   {
703     uiSum += abs( piOrg[0] - piCur[0] );
704     uiSum += abs( piOrg[1] - piCur[1] );
705     uiSum += abs( piOrg[2] - piCur[2] );
706     uiSum += abs( piOrg[3] - piCur[3] );
707     uiSum += abs( piOrg[4] - piCur[4] );
708     uiSum += abs( piOrg[5] - piCur[5] );
709     uiSum += abs( piOrg[6] - piCur[6] );
710     uiSum += abs( piOrg[7] - piCur[7] );
711     uiSum += abs( piOrg[8] - piCur[8] );
712     uiSum += abs( piOrg[9] - piCur[9] );
713     uiSum += abs( piOrg[10] - piCur[10] );
714     uiSum += abs( piOrg[11] - piCur[11] );
715     uiSum += abs( piOrg[12] - piCur[12] );
716     uiSum += abs( piOrg[13] - piCur[13] );
717     uiSum += abs( piOrg[14] - piCur[14] );
718     uiSum += abs( piOrg[15] - piCur[15] );
719     uiSum += abs( piOrg[16] - piCur[16] );
720     uiSum += abs( piOrg[17] - piCur[17] );
721     uiSum += abs( piOrg[18] - piCur[18] );
722     uiSum += abs( piOrg[19] - piCur[19] );
723     uiSum += abs( piOrg[20] - piCur[20] );
724     uiSum += abs( piOrg[21] - piCur[21] );
725     uiSum += abs( piOrg[22] - piCur[22] );
726     uiSum += abs( piOrg[23] - piCur[23] );
727     uiSum += abs( piOrg[24] - piCur[24] );
728     uiSum += abs( piOrg[25] - piCur[25] );
729     uiSum += abs( piOrg[26] - piCur[26] );
730     uiSum += abs( piOrg[27] - piCur[27] );
731     uiSum += abs( piOrg[28] - piCur[28] );
732     uiSum += abs( piOrg[29] - piCur[29] );
733     uiSum += abs( piOrg[30] - piCur[30] );
734     uiSum += abs( piOrg[31] - piCur[31] );
735 
736     piOrg += iStrideOrg;
737     piCur += iStrideCur;
738   }
739 
740   uiSum <<= iSubShift;
741   return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) );
742 }
743 
744 #if AMP_SAD
xGetSAD24(DistParam * pcDtParam)745 Distortion TComRdCost::xGetSAD24( DistParam* pcDtParam )
746 {
747   if ( pcDtParam->bApplyWeight )
748   {
749     return TComRdCostWeightPrediction::xGetSADw( pcDtParam );
750   }
751   const Pel* piOrg   = pcDtParam->pOrg;
752   const Pel* piCur   = pcDtParam->pCur;
753   Int  iRows   = pcDtParam->iRows;
754   Int  iSubShift  = pcDtParam->iSubShift;
755   Int  iSubStep   = ( 1 << iSubShift );
756   Int  iStrideCur = pcDtParam->iStrideCur*iSubStep;
757   Int  iStrideOrg = pcDtParam->iStrideOrg*iSubStep;
758 
759   Distortion uiSum = 0;
760 
761   for( ; iRows != 0; iRows-=iSubStep )
762   {
763     uiSum += abs( piOrg[0] - piCur[0] );
764     uiSum += abs( piOrg[1] - piCur[1] );
765     uiSum += abs( piOrg[2] - piCur[2] );
766     uiSum += abs( piOrg[3] - piCur[3] );
767     uiSum += abs( piOrg[4] - piCur[4] );
768     uiSum += abs( piOrg[5] - piCur[5] );
769     uiSum += abs( piOrg[6] - piCur[6] );
770     uiSum += abs( piOrg[7] - piCur[7] );
771     uiSum += abs( piOrg[8] - piCur[8] );
772     uiSum += abs( piOrg[9] - piCur[9] );
773     uiSum += abs( piOrg[10] - piCur[10] );
774     uiSum += abs( piOrg[11] - piCur[11] );
775     uiSum += abs( piOrg[12] - piCur[12] );
776     uiSum += abs( piOrg[13] - piCur[13] );
777     uiSum += abs( piOrg[14] - piCur[14] );
778     uiSum += abs( piOrg[15] - piCur[15] );
779     uiSum += abs( piOrg[16] - piCur[16] );
780     uiSum += abs( piOrg[17] - piCur[17] );
781     uiSum += abs( piOrg[18] - piCur[18] );
782     uiSum += abs( piOrg[19] - piCur[19] );
783     uiSum += abs( piOrg[20] - piCur[20] );
784     uiSum += abs( piOrg[21] - piCur[21] );
785     uiSum += abs( piOrg[22] - piCur[22] );
786     uiSum += abs( piOrg[23] - piCur[23] );
787 
788     piOrg += iStrideOrg;
789     piCur += iStrideCur;
790   }
791 
792   uiSum <<= iSubShift;
793   return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) );
794 }
795 
796 #endif
797 
xGetSAD64(DistParam * pcDtParam)798 Distortion TComRdCost::xGetSAD64( DistParam* pcDtParam )
799 {
800   if ( pcDtParam->bApplyWeight )
801   {
802     return TComRdCostWeightPrediction::xGetSADw( pcDtParam );
803   }
804   const Pel* piOrg   = pcDtParam->pOrg;
805   const Pel* piCur   = pcDtParam->pCur;
806   Int  iRows   = pcDtParam->iRows;
807   Int  iSubShift  = pcDtParam->iSubShift;
808   Int  iSubStep   = ( 1 << iSubShift );
809   Int  iStrideCur = pcDtParam->iStrideCur*iSubStep;
810   Int  iStrideOrg = pcDtParam->iStrideOrg*iSubStep;
811 
812   Distortion uiSum = 0;
813 
814   for( ; iRows != 0; iRows-=iSubStep )
815   {
816     uiSum += abs( piOrg[0] - piCur[0] );
817     uiSum += abs( piOrg[1] - piCur[1] );
818     uiSum += abs( piOrg[2] - piCur[2] );
819     uiSum += abs( piOrg[3] - piCur[3] );
820     uiSum += abs( piOrg[4] - piCur[4] );
821     uiSum += abs( piOrg[5] - piCur[5] );
822     uiSum += abs( piOrg[6] - piCur[6] );
823     uiSum += abs( piOrg[7] - piCur[7] );
824     uiSum += abs( piOrg[8] - piCur[8] );
825     uiSum += abs( piOrg[9] - piCur[9] );
826     uiSum += abs( piOrg[10] - piCur[10] );
827     uiSum += abs( piOrg[11] - piCur[11] );
828     uiSum += abs( piOrg[12] - piCur[12] );
829     uiSum += abs( piOrg[13] - piCur[13] );
830     uiSum += abs( piOrg[14] - piCur[14] );
831     uiSum += abs( piOrg[15] - piCur[15] );
832     uiSum += abs( piOrg[16] - piCur[16] );
833     uiSum += abs( piOrg[17] - piCur[17] );
834     uiSum += abs( piOrg[18] - piCur[18] );
835     uiSum += abs( piOrg[19] - piCur[19] );
836     uiSum += abs( piOrg[20] - piCur[20] );
837     uiSum += abs( piOrg[21] - piCur[21] );
838     uiSum += abs( piOrg[22] - piCur[22] );
839     uiSum += abs( piOrg[23] - piCur[23] );
840     uiSum += abs( piOrg[24] - piCur[24] );
841     uiSum += abs( piOrg[25] - piCur[25] );
842     uiSum += abs( piOrg[26] - piCur[26] );
843     uiSum += abs( piOrg[27] - piCur[27] );
844     uiSum += abs( piOrg[28] - piCur[28] );
845     uiSum += abs( piOrg[29] - piCur[29] );
846     uiSum += abs( piOrg[30] - piCur[30] );
847     uiSum += abs( piOrg[31] - piCur[31] );
848     uiSum += abs( piOrg[32] - piCur[32] );
849     uiSum += abs( piOrg[33] - piCur[33] );
850     uiSum += abs( piOrg[34] - piCur[34] );
851     uiSum += abs( piOrg[35] - piCur[35] );
852     uiSum += abs( piOrg[36] - piCur[36] );
853     uiSum += abs( piOrg[37] - piCur[37] );
854     uiSum += abs( piOrg[38] - piCur[38] );
855     uiSum += abs( piOrg[39] - piCur[39] );
856     uiSum += abs( piOrg[40] - piCur[40] );
857     uiSum += abs( piOrg[41] - piCur[41] );
858     uiSum += abs( piOrg[42] - piCur[42] );
859     uiSum += abs( piOrg[43] - piCur[43] );
860     uiSum += abs( piOrg[44] - piCur[44] );
861     uiSum += abs( piOrg[45] - piCur[45] );
862     uiSum += abs( piOrg[46] - piCur[46] );
863     uiSum += abs( piOrg[47] - piCur[47] );
864     uiSum += abs( piOrg[48] - piCur[48] );
865     uiSum += abs( piOrg[49] - piCur[49] );
866     uiSum += abs( piOrg[50] - piCur[50] );
867     uiSum += abs( piOrg[51] - piCur[51] );
868     uiSum += abs( piOrg[52] - piCur[52] );
869     uiSum += abs( piOrg[53] - piCur[53] );
870     uiSum += abs( piOrg[54] - piCur[54] );
871     uiSum += abs( piOrg[55] - piCur[55] );
872     uiSum += abs( piOrg[56] - piCur[56] );
873     uiSum += abs( piOrg[57] - piCur[57] );
874     uiSum += abs( piOrg[58] - piCur[58] );
875     uiSum += abs( piOrg[59] - piCur[59] );
876     uiSum += abs( piOrg[60] - piCur[60] );
877     uiSum += abs( piOrg[61] - piCur[61] );
878     uiSum += abs( piOrg[62] - piCur[62] );
879     uiSum += abs( piOrg[63] - piCur[63] );
880 
881     piOrg += iStrideOrg;
882     piCur += iStrideCur;
883   }
884 
885   uiSum <<= iSubShift;
886   return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) );
887 }
888 
889 #if AMP_SAD
xGetSAD48(DistParam * pcDtParam)890 Distortion TComRdCost::xGetSAD48( DistParam* pcDtParam )
891 {
892   if ( pcDtParam->bApplyWeight )
893   {
894     return TComRdCostWeightPrediction::xGetSADw( pcDtParam );
895   }
896   const Pel* piOrg   = pcDtParam->pOrg;
897   const Pel* piCur   = pcDtParam->pCur;
898   Int  iRows   = pcDtParam->iRows;
899   Int  iSubShift  = pcDtParam->iSubShift;
900   Int  iSubStep   = ( 1 << iSubShift );
901   Int  iStrideCur = pcDtParam->iStrideCur*iSubStep;
902   Int  iStrideOrg = pcDtParam->iStrideOrg*iSubStep;
903 
904   Distortion uiSum = 0;
905 
906   for( ; iRows != 0; iRows-=iSubStep )
907   {
908     uiSum += abs( piOrg[0] - piCur[0] );
909     uiSum += abs( piOrg[1] - piCur[1] );
910     uiSum += abs( piOrg[2] - piCur[2] );
911     uiSum += abs( piOrg[3] - piCur[3] );
912     uiSum += abs( piOrg[4] - piCur[4] );
913     uiSum += abs( piOrg[5] - piCur[5] );
914     uiSum += abs( piOrg[6] - piCur[6] );
915     uiSum += abs( piOrg[7] - piCur[7] );
916     uiSum += abs( piOrg[8] - piCur[8] );
917     uiSum += abs( piOrg[9] - piCur[9] );
918     uiSum += abs( piOrg[10] - piCur[10] );
919     uiSum += abs( piOrg[11] - piCur[11] );
920     uiSum += abs( piOrg[12] - piCur[12] );
921     uiSum += abs( piOrg[13] - piCur[13] );
922     uiSum += abs( piOrg[14] - piCur[14] );
923     uiSum += abs( piOrg[15] - piCur[15] );
924     uiSum += abs( piOrg[16] - piCur[16] );
925     uiSum += abs( piOrg[17] - piCur[17] );
926     uiSum += abs( piOrg[18] - piCur[18] );
927     uiSum += abs( piOrg[19] - piCur[19] );
928     uiSum += abs( piOrg[20] - piCur[20] );
929     uiSum += abs( piOrg[21] - piCur[21] );
930     uiSum += abs( piOrg[22] - piCur[22] );
931     uiSum += abs( piOrg[23] - piCur[23] );
932     uiSum += abs( piOrg[24] - piCur[24] );
933     uiSum += abs( piOrg[25] - piCur[25] );
934     uiSum += abs( piOrg[26] - piCur[26] );
935     uiSum += abs( piOrg[27] - piCur[27] );
936     uiSum += abs( piOrg[28] - piCur[28] );
937     uiSum += abs( piOrg[29] - piCur[29] );
938     uiSum += abs( piOrg[30] - piCur[30] );
939     uiSum += abs( piOrg[31] - piCur[31] );
940     uiSum += abs( piOrg[32] - piCur[32] );
941     uiSum += abs( piOrg[33] - piCur[33] );
942     uiSum += abs( piOrg[34] - piCur[34] );
943     uiSum += abs( piOrg[35] - piCur[35] );
944     uiSum += abs( piOrg[36] - piCur[36] );
945     uiSum += abs( piOrg[37] - piCur[37] );
946     uiSum += abs( piOrg[38] - piCur[38] );
947     uiSum += abs( piOrg[39] - piCur[39] );
948     uiSum += abs( piOrg[40] - piCur[40] );
949     uiSum += abs( piOrg[41] - piCur[41] );
950     uiSum += abs( piOrg[42] - piCur[42] );
951     uiSum += abs( piOrg[43] - piCur[43] );
952     uiSum += abs( piOrg[44] - piCur[44] );
953     uiSum += abs( piOrg[45] - piCur[45] );
954     uiSum += abs( piOrg[46] - piCur[46] );
955     uiSum += abs( piOrg[47] - piCur[47] );
956 
957     piOrg += iStrideOrg;
958     piCur += iStrideCur;
959   }
960 
961   uiSum <<= iSubShift;
962   return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) );
963 }
964 #endif
965 
966 // --------------------------------------------------------------------------------------------------------------------
967 // SSE
968 // --------------------------------------------------------------------------------------------------------------------
969 
xGetSSE(DistParam * pcDtParam)970 Distortion TComRdCost::xGetSSE( DistParam* pcDtParam )
971 {
972   if ( pcDtParam->bApplyWeight )
973   {
974     return TComRdCostWeightPrediction::xGetSSEw( pcDtParam );
975   }
976   const Pel* piOrg   = pcDtParam->pOrg;
977   const Pel* piCur   = pcDtParam->pCur;
978   Int  iRows   = pcDtParam->iRows;
979   Int  iCols   = pcDtParam->iCols;
980   Int  iStrideOrg = pcDtParam->iStrideOrg;
981   Int  iStrideCur = pcDtParam->iStrideCur;
982 
983   Distortion uiSum   = 0;
984   UInt       uiShift = DISTORTION_PRECISION_ADJUSTMENT((pcDtParam->bitDepth-8) << 1);
985 
986   Intermediate_Int iTemp;
987 
988   for( ; iRows != 0; iRows-- )
989   {
990     for (Int n = 0; n < iCols; n++ )
991     {
992       iTemp = piOrg[n  ] - piCur[n  ];
993       uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
994     }
995     piOrg += iStrideOrg;
996     piCur += iStrideCur;
997   }
998 
999   return ( uiSum );
1000 }
1001 
xGetSSE4(DistParam * pcDtParam)1002 Distortion TComRdCost::xGetSSE4( DistParam* pcDtParam )
1003 {
1004   if ( pcDtParam->bApplyWeight )
1005   {
1006     assert( pcDtParam->iCols == 4 );
1007     return TComRdCostWeightPrediction::xGetSSEw( pcDtParam );
1008   }
1009   const Pel* piOrg   = pcDtParam->pOrg;
1010   const Pel* piCur   = pcDtParam->pCur;
1011   Int  iRows   = pcDtParam->iRows;
1012   Int  iStrideOrg = pcDtParam->iStrideOrg;
1013   Int  iStrideCur = pcDtParam->iStrideCur;
1014 
1015   Distortion uiSum   = 0;
1016   UInt       uiShift = DISTORTION_PRECISION_ADJUSTMENT((pcDtParam->bitDepth-8) << 1);
1017 
1018   Intermediate_Int  iTemp;
1019 
1020   for( ; iRows != 0; iRows-- )
1021   {
1022 
1023     iTemp = piOrg[0] - piCur[0]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1024     iTemp = piOrg[1] - piCur[1]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1025     iTemp = piOrg[2] - piCur[2]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1026     iTemp = piOrg[3] - piCur[3]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1027 
1028     piOrg += iStrideOrg;
1029     piCur += iStrideCur;
1030   }
1031 
1032   return ( uiSum );
1033 }
1034 
xGetSSE8(DistParam * pcDtParam)1035 Distortion TComRdCost::xGetSSE8( DistParam* pcDtParam )
1036 {
1037   if ( pcDtParam->bApplyWeight )
1038   {
1039     assert( pcDtParam->iCols == 8 );
1040     return TComRdCostWeightPrediction::xGetSSEw( pcDtParam );
1041   }
1042   const Pel* piOrg   = pcDtParam->pOrg;
1043   const Pel* piCur   = pcDtParam->pCur;
1044   Int  iRows   = pcDtParam->iRows;
1045   Int  iStrideOrg = pcDtParam->iStrideOrg;
1046   Int  iStrideCur = pcDtParam->iStrideCur;
1047 
1048   Distortion uiSum   = 0;
1049   UInt       uiShift = DISTORTION_PRECISION_ADJUSTMENT((pcDtParam->bitDepth-8) << 1);
1050 
1051   Intermediate_Int  iTemp;
1052 
1053   for( ; iRows != 0; iRows-- )
1054   {
1055     iTemp = piOrg[0] - piCur[0]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1056     iTemp = piOrg[1] - piCur[1]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1057     iTemp = piOrg[2] - piCur[2]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1058     iTemp = piOrg[3] - piCur[3]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1059     iTemp = piOrg[4] - piCur[4]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1060     iTemp = piOrg[5] - piCur[5]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1061     iTemp = piOrg[6] - piCur[6]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1062     iTemp = piOrg[7] - piCur[7]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1063 
1064     piOrg += iStrideOrg;
1065     piCur += iStrideCur;
1066   }
1067 
1068   return ( uiSum );
1069 }
1070 
xGetSSE16(DistParam * pcDtParam)1071 Distortion TComRdCost::xGetSSE16( DistParam* pcDtParam )
1072 {
1073   if ( pcDtParam->bApplyWeight )
1074   {
1075     assert( pcDtParam->iCols == 16 );
1076     return TComRdCostWeightPrediction::xGetSSEw( pcDtParam );
1077   }
1078   const Pel* piOrg   = pcDtParam->pOrg;
1079   const Pel* piCur   = pcDtParam->pCur;
1080   Int  iRows   = pcDtParam->iRows;
1081   Int  iStrideOrg = pcDtParam->iStrideOrg;
1082   Int  iStrideCur = pcDtParam->iStrideCur;
1083 
1084   Distortion uiSum   = 0;
1085   UInt       uiShift = DISTORTION_PRECISION_ADJUSTMENT((pcDtParam->bitDepth-8) << 1);
1086 
1087   Intermediate_Int  iTemp;
1088 
1089   for( ; iRows != 0; iRows-- )
1090   {
1091 
1092     iTemp = piOrg[ 0] - piCur[ 0]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1093     iTemp = piOrg[ 1] - piCur[ 1]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1094     iTemp = piOrg[ 2] - piCur[ 2]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1095     iTemp = piOrg[ 3] - piCur[ 3]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1096     iTemp = piOrg[ 4] - piCur[ 4]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1097     iTemp = piOrg[ 5] - piCur[ 5]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1098     iTemp = piOrg[ 6] - piCur[ 6]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1099     iTemp = piOrg[ 7] - piCur[ 7]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1100     iTemp = piOrg[ 8] - piCur[ 8]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1101     iTemp = piOrg[ 9] - piCur[ 9]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1102     iTemp = piOrg[10] - piCur[10]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1103     iTemp = piOrg[11] - piCur[11]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1104     iTemp = piOrg[12] - piCur[12]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1105     iTemp = piOrg[13] - piCur[13]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1106     iTemp = piOrg[14] - piCur[14]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1107     iTemp = piOrg[15] - piCur[15]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1108 
1109     piOrg += iStrideOrg;
1110     piCur += iStrideCur;
1111   }
1112 
1113   return ( uiSum );
1114 }
1115 
xGetSSE16N(DistParam * pcDtParam)1116 Distortion TComRdCost::xGetSSE16N( DistParam* pcDtParam )
1117 {
1118   if ( pcDtParam->bApplyWeight )
1119   {
1120     return TComRdCostWeightPrediction::xGetSSEw( pcDtParam );
1121   }
1122   const Pel* piOrg   = pcDtParam->pOrg;
1123   const Pel* piCur   = pcDtParam->pCur;
1124   Int  iRows   = pcDtParam->iRows;
1125   Int  iCols   = pcDtParam->iCols;
1126   Int  iStrideOrg = pcDtParam->iStrideOrg;
1127   Int  iStrideCur = pcDtParam->iStrideCur;
1128 
1129   Distortion uiSum   = 0;
1130   UInt       uiShift = DISTORTION_PRECISION_ADJUSTMENT((pcDtParam->bitDepth-8) << 1);
1131 
1132   Intermediate_Int  iTemp;
1133 
1134   for( ; iRows != 0; iRows-- )
1135   {
1136     for (Int n = 0; n < iCols; n+=16 )
1137     {
1138 
1139       iTemp = piOrg[n+ 0] - piCur[n+ 0]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1140       iTemp = piOrg[n+ 1] - piCur[n+ 1]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1141       iTemp = piOrg[n+ 2] - piCur[n+ 2]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1142       iTemp = piOrg[n+ 3] - piCur[n+ 3]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1143       iTemp = piOrg[n+ 4] - piCur[n+ 4]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1144       iTemp = piOrg[n+ 5] - piCur[n+ 5]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1145       iTemp = piOrg[n+ 6] - piCur[n+ 6]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1146       iTemp = piOrg[n+ 7] - piCur[n+ 7]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1147       iTemp = piOrg[n+ 8] - piCur[n+ 8]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1148       iTemp = piOrg[n+ 9] - piCur[n+ 9]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1149       iTemp = piOrg[n+10] - piCur[n+10]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1150       iTemp = piOrg[n+11] - piCur[n+11]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1151       iTemp = piOrg[n+12] - piCur[n+12]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1152       iTemp = piOrg[n+13] - piCur[n+13]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1153       iTemp = piOrg[n+14] - piCur[n+14]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1154       iTemp = piOrg[n+15] - piCur[n+15]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1155 
1156     }
1157     piOrg += iStrideOrg;
1158     piCur += iStrideCur;
1159   }
1160 
1161   return ( uiSum );
1162 }
1163 
xGetSSE32(DistParam * pcDtParam)1164 Distortion TComRdCost::xGetSSE32( DistParam* pcDtParam )
1165 {
1166   if ( pcDtParam->bApplyWeight )
1167   {
1168     assert( pcDtParam->iCols == 32 );
1169     return TComRdCostWeightPrediction::xGetSSEw( pcDtParam );
1170   }
1171   const Pel* piOrg   = pcDtParam->pOrg;
1172   const Pel* piCur   = pcDtParam->pCur;
1173   Int  iRows   = pcDtParam->iRows;
1174   Int  iStrideOrg = pcDtParam->iStrideOrg;
1175   Int  iStrideCur = pcDtParam->iStrideCur;
1176 
1177   Distortion uiSum   = 0;
1178   UInt       uiShift = DISTORTION_PRECISION_ADJUSTMENT((pcDtParam->bitDepth-8) << 1);
1179 
1180   Intermediate_Int  iTemp;
1181 
1182   for( ; iRows != 0; iRows-- )
1183   {
1184 
1185     iTemp = piOrg[ 0] - piCur[ 0]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1186     iTemp = piOrg[ 1] - piCur[ 1]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1187     iTemp = piOrg[ 2] - piCur[ 2]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1188     iTemp = piOrg[ 3] - piCur[ 3]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1189     iTemp = piOrg[ 4] - piCur[ 4]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1190     iTemp = piOrg[ 5] - piCur[ 5]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1191     iTemp = piOrg[ 6] - piCur[ 6]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1192     iTemp = piOrg[ 7] - piCur[ 7]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1193     iTemp = piOrg[ 8] - piCur[ 8]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1194     iTemp = piOrg[ 9] - piCur[ 9]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1195     iTemp = piOrg[10] - piCur[10]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1196     iTemp = piOrg[11] - piCur[11]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1197     iTemp = piOrg[12] - piCur[12]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1198     iTemp = piOrg[13] - piCur[13]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1199     iTemp = piOrg[14] - piCur[14]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1200     iTemp = piOrg[15] - piCur[15]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1201     iTemp = piOrg[16] - piCur[16]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1202     iTemp = piOrg[17] - piCur[17]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1203     iTemp = piOrg[18] - piCur[18]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1204     iTemp = piOrg[19] - piCur[19]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1205     iTemp = piOrg[20] - piCur[20]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1206     iTemp = piOrg[21] - piCur[21]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1207     iTemp = piOrg[22] - piCur[22]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1208     iTemp = piOrg[23] - piCur[23]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1209     iTemp = piOrg[24] - piCur[24]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1210     iTemp = piOrg[25] - piCur[25]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1211     iTemp = piOrg[26] - piCur[26]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1212     iTemp = piOrg[27] - piCur[27]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1213     iTemp = piOrg[28] - piCur[28]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1214     iTemp = piOrg[29] - piCur[29]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1215     iTemp = piOrg[30] - piCur[30]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1216     iTemp = piOrg[31] - piCur[31]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1217 
1218     piOrg += iStrideOrg;
1219     piCur += iStrideCur;
1220   }
1221 
1222   return ( uiSum );
1223 }
1224 
xGetSSE64(DistParam * pcDtParam)1225 Distortion TComRdCost::xGetSSE64( DistParam* pcDtParam )
1226 {
1227   if ( pcDtParam->bApplyWeight )
1228   {
1229     assert( pcDtParam->iCols == 64 );
1230     return TComRdCostWeightPrediction::xGetSSEw( pcDtParam );
1231   }
1232   const Pel* piOrg   = pcDtParam->pOrg;
1233   const Pel* piCur   = pcDtParam->pCur;
1234   Int  iRows   = pcDtParam->iRows;
1235   Int  iStrideOrg = pcDtParam->iStrideOrg;
1236   Int  iStrideCur = pcDtParam->iStrideCur;
1237 
1238   Distortion uiSum   = 0;
1239   UInt       uiShift = DISTORTION_PRECISION_ADJUSTMENT((pcDtParam->bitDepth-8) << 1);
1240 
1241   Intermediate_Int  iTemp;
1242 
1243   for( ; iRows != 0; iRows-- )
1244   {
1245     iTemp = piOrg[ 0] - piCur[ 0]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1246     iTemp = piOrg[ 1] - piCur[ 1]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1247     iTemp = piOrg[ 2] - piCur[ 2]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1248     iTemp = piOrg[ 3] - piCur[ 3]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1249     iTemp = piOrg[ 4] - piCur[ 4]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1250     iTemp = piOrg[ 5] - piCur[ 5]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1251     iTemp = piOrg[ 6] - piCur[ 6]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1252     iTemp = piOrg[ 7] - piCur[ 7]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1253     iTemp = piOrg[ 8] - piCur[ 8]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1254     iTemp = piOrg[ 9] - piCur[ 9]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1255     iTemp = piOrg[10] - piCur[10]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1256     iTemp = piOrg[11] - piCur[11]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1257     iTemp = piOrg[12] - piCur[12]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1258     iTemp = piOrg[13] - piCur[13]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1259     iTemp = piOrg[14] - piCur[14]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1260     iTemp = piOrg[15] - piCur[15]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1261     iTemp = piOrg[16] - piCur[16]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1262     iTemp = piOrg[17] - piCur[17]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1263     iTemp = piOrg[18] - piCur[18]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1264     iTemp = piOrg[19] - piCur[19]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1265     iTemp = piOrg[20] - piCur[20]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1266     iTemp = piOrg[21] - piCur[21]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1267     iTemp = piOrg[22] - piCur[22]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1268     iTemp = piOrg[23] - piCur[23]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1269     iTemp = piOrg[24] - piCur[24]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1270     iTemp = piOrg[25] - piCur[25]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1271     iTemp = piOrg[26] - piCur[26]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1272     iTemp = piOrg[27] - piCur[27]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1273     iTemp = piOrg[28] - piCur[28]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1274     iTemp = piOrg[29] - piCur[29]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1275     iTemp = piOrg[30] - piCur[30]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1276     iTemp = piOrg[31] - piCur[31]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1277     iTemp = piOrg[32] - piCur[32]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1278     iTemp = piOrg[33] - piCur[33]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1279     iTemp = piOrg[34] - piCur[34]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1280     iTemp = piOrg[35] - piCur[35]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1281     iTemp = piOrg[36] - piCur[36]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1282     iTemp = piOrg[37] - piCur[37]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1283     iTemp = piOrg[38] - piCur[38]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1284     iTemp = piOrg[39] - piCur[39]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1285     iTemp = piOrg[40] - piCur[40]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1286     iTemp = piOrg[41] - piCur[41]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1287     iTemp = piOrg[42] - piCur[42]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1288     iTemp = piOrg[43] - piCur[43]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1289     iTemp = piOrg[44] - piCur[44]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1290     iTemp = piOrg[45] - piCur[45]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1291     iTemp = piOrg[46] - piCur[46]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1292     iTemp = piOrg[47] - piCur[47]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1293     iTemp = piOrg[48] - piCur[48]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1294     iTemp = piOrg[49] - piCur[49]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1295     iTemp = piOrg[50] - piCur[50]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1296     iTemp = piOrg[51] - piCur[51]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1297     iTemp = piOrg[52] - piCur[52]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1298     iTemp = piOrg[53] - piCur[53]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1299     iTemp = piOrg[54] - piCur[54]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1300     iTemp = piOrg[55] - piCur[55]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1301     iTemp = piOrg[56] - piCur[56]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1302     iTemp = piOrg[57] - piCur[57]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1303     iTemp = piOrg[58] - piCur[58]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1304     iTemp = piOrg[59] - piCur[59]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1305     iTemp = piOrg[60] - piCur[60]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1306     iTemp = piOrg[61] - piCur[61]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1307     iTemp = piOrg[62] - piCur[62]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1308     iTemp = piOrg[63] - piCur[63]; uiSum += Distortion(( iTemp * iTemp ) >> uiShift);
1309 
1310     piOrg += iStrideOrg;
1311     piCur += iStrideCur;
1312   }
1313 
1314   return ( uiSum );
1315 }
1316 
1317 // --------------------------------------------------------------------------------------------------------------------
1318 // HADAMARD with step (used in fractional search)
1319 // --------------------------------------------------------------------------------------------------------------------
1320 
xCalcHADs2x2(Pel * piOrg,Pel * piCur,Int iStrideOrg,Int iStrideCur,Int iStep)1321 Distortion TComRdCost::xCalcHADs2x2( Pel *piOrg, Pel *piCur, Int iStrideOrg, Int iStrideCur, Int iStep )
1322 {
1323   Distortion satd = 0;
1324   TCoeff diff[4], m[4];
1325   assert( iStep == 1 );
1326   diff[0] = piOrg[0             ] - piCur[0];
1327   diff[1] = piOrg[1             ] - piCur[1];
1328   diff[2] = piOrg[iStrideOrg    ] - piCur[0 + iStrideCur];
1329   diff[3] = piOrg[iStrideOrg + 1] - piCur[1 + iStrideCur];
1330   m[0] = diff[0] + diff[2];
1331   m[1] = diff[1] + diff[3];
1332   m[2] = diff[0] - diff[2];
1333   m[3] = diff[1] - diff[3];
1334 
1335   satd += abs(m[0] + m[1]);
1336   satd += abs(m[0] - m[1]);
1337   satd += abs(m[2] + m[3]);
1338   satd += abs(m[2] - m[3]);
1339 
1340   return satd;
1341 }
1342 
xCalcHADs4x4(Pel * piOrg,Pel * piCur,Int iStrideOrg,Int iStrideCur,Int iStep)1343 Distortion TComRdCost::xCalcHADs4x4( Pel *piOrg, Pel *piCur, Int iStrideOrg, Int iStrideCur, Int iStep )
1344 {
1345   Int k;
1346   Distortion satd = 0;
1347   TCoeff diff[16], m[16], d[16];
1348 
1349   assert( iStep == 1 );
1350   for( k = 0; k < 16; k+=4 )
1351   {
1352     diff[k+0] = piOrg[0] - piCur[0];
1353     diff[k+1] = piOrg[1] - piCur[1];
1354     diff[k+2] = piOrg[2] - piCur[2];
1355     diff[k+3] = piOrg[3] - piCur[3];
1356 
1357     piCur += iStrideCur;
1358     piOrg += iStrideOrg;
1359   }
1360 
1361   /*===== hadamard transform =====*/
1362   m[ 0] = diff[ 0] + diff[12];
1363   m[ 1] = diff[ 1] + diff[13];
1364   m[ 2] = diff[ 2] + diff[14];
1365   m[ 3] = diff[ 3] + diff[15];
1366   m[ 4] = diff[ 4] + diff[ 8];
1367   m[ 5] = diff[ 5] + diff[ 9];
1368   m[ 6] = diff[ 6] + diff[10];
1369   m[ 7] = diff[ 7] + diff[11];
1370   m[ 8] = diff[ 4] - diff[ 8];
1371   m[ 9] = diff[ 5] - diff[ 9];
1372   m[10] = diff[ 6] - diff[10];
1373   m[11] = diff[ 7] - diff[11];
1374   m[12] = diff[ 0] - diff[12];
1375   m[13] = diff[ 1] - diff[13];
1376   m[14] = diff[ 2] - diff[14];
1377   m[15] = diff[ 3] - diff[15];
1378 
1379   d[ 0] = m[ 0] + m[ 4];
1380   d[ 1] = m[ 1] + m[ 5];
1381   d[ 2] = m[ 2] + m[ 6];
1382   d[ 3] = m[ 3] + m[ 7];
1383   d[ 4] = m[ 8] + m[12];
1384   d[ 5] = m[ 9] + m[13];
1385   d[ 6] = m[10] + m[14];
1386   d[ 7] = m[11] + m[15];
1387   d[ 8] = m[ 0] - m[ 4];
1388   d[ 9] = m[ 1] - m[ 5];
1389   d[10] = m[ 2] - m[ 6];
1390   d[11] = m[ 3] - m[ 7];
1391   d[12] = m[12] - m[ 8];
1392   d[13] = m[13] - m[ 9];
1393   d[14] = m[14] - m[10];
1394   d[15] = m[15] - m[11];
1395 
1396   m[ 0] = d[ 0] + d[ 3];
1397   m[ 1] = d[ 1] + d[ 2];
1398   m[ 2] = d[ 1] - d[ 2];
1399   m[ 3] = d[ 0] - d[ 3];
1400   m[ 4] = d[ 4] + d[ 7];
1401   m[ 5] = d[ 5] + d[ 6];
1402   m[ 6] = d[ 5] - d[ 6];
1403   m[ 7] = d[ 4] - d[ 7];
1404   m[ 8] = d[ 8] + d[11];
1405   m[ 9] = d[ 9] + d[10];
1406   m[10] = d[ 9] - d[10];
1407   m[11] = d[ 8] - d[11];
1408   m[12] = d[12] + d[15];
1409   m[13] = d[13] + d[14];
1410   m[14] = d[13] - d[14];
1411   m[15] = d[12] - d[15];
1412 
1413   d[ 0] = m[ 0] + m[ 1];
1414   d[ 1] = m[ 0] - m[ 1];
1415   d[ 2] = m[ 2] + m[ 3];
1416   d[ 3] = m[ 3] - m[ 2];
1417   d[ 4] = m[ 4] + m[ 5];
1418   d[ 5] = m[ 4] - m[ 5];
1419   d[ 6] = m[ 6] + m[ 7];
1420   d[ 7] = m[ 7] - m[ 6];
1421   d[ 8] = m[ 8] + m[ 9];
1422   d[ 9] = m[ 8] - m[ 9];
1423   d[10] = m[10] + m[11];
1424   d[11] = m[11] - m[10];
1425   d[12] = m[12] + m[13];
1426   d[13] = m[12] - m[13];
1427   d[14] = m[14] + m[15];
1428   d[15] = m[15] - m[14];
1429 
1430   for (k=0; k<16; ++k)
1431   {
1432     satd += abs(d[k]);
1433   }
1434   satd = ((satd+1)>>1);
1435 
1436   return satd;
1437 }
1438 
xCalcHADs8x8(Pel * piOrg,Pel * piCur,Int iStrideOrg,Int iStrideCur,Int iStep)1439 Distortion TComRdCost::xCalcHADs8x8( Pel *piOrg, Pel *piCur, Int iStrideOrg, Int iStrideCur, Int iStep )
1440 {
1441   Int k, i, j, jj;
1442   Distortion sad = 0;
1443   TCoeff diff[64], m1[8][8], m2[8][8], m3[8][8];
1444   assert( iStep == 1 );
1445   for( k = 0; k < 64; k += 8 )
1446   {
1447     diff[k+0] = piOrg[0] - piCur[0];
1448     diff[k+1] = piOrg[1] - piCur[1];
1449     diff[k+2] = piOrg[2] - piCur[2];
1450     diff[k+3] = piOrg[3] - piCur[3];
1451     diff[k+4] = piOrg[4] - piCur[4];
1452     diff[k+5] = piOrg[5] - piCur[5];
1453     diff[k+6] = piOrg[6] - piCur[6];
1454     diff[k+7] = piOrg[7] - piCur[7];
1455 
1456     piCur += iStrideCur;
1457     piOrg += iStrideOrg;
1458   }
1459 
1460   //horizontal
1461   for (j=0; j < 8; j++)
1462   {
1463     jj = j << 3;
1464     m2[j][0] = diff[jj  ] + diff[jj+4];
1465     m2[j][1] = diff[jj+1] + diff[jj+5];
1466     m2[j][2] = diff[jj+2] + diff[jj+6];
1467     m2[j][3] = diff[jj+3] + diff[jj+7];
1468     m2[j][4] = diff[jj  ] - diff[jj+4];
1469     m2[j][5] = diff[jj+1] - diff[jj+5];
1470     m2[j][6] = diff[jj+2] - diff[jj+6];
1471     m2[j][7] = diff[jj+3] - diff[jj+7];
1472 
1473     m1[j][0] = m2[j][0] + m2[j][2];
1474     m1[j][1] = m2[j][1] + m2[j][3];
1475     m1[j][2] = m2[j][0] - m2[j][2];
1476     m1[j][3] = m2[j][1] - m2[j][3];
1477     m1[j][4] = m2[j][4] + m2[j][6];
1478     m1[j][5] = m2[j][5] + m2[j][7];
1479     m1[j][6] = m2[j][4] - m2[j][6];
1480     m1[j][7] = m2[j][5] - m2[j][7];
1481 
1482     m2[j][0] = m1[j][0] + m1[j][1];
1483     m2[j][1] = m1[j][0] - m1[j][1];
1484     m2[j][2] = m1[j][2] + m1[j][3];
1485     m2[j][3] = m1[j][2] - m1[j][3];
1486     m2[j][4] = m1[j][4] + m1[j][5];
1487     m2[j][5] = m1[j][4] - m1[j][5];
1488     m2[j][6] = m1[j][6] + m1[j][7];
1489     m2[j][7] = m1[j][6] - m1[j][7];
1490   }
1491 
1492   //vertical
1493   for (i=0; i < 8; i++)
1494   {
1495     m3[0][i] = m2[0][i] + m2[4][i];
1496     m3[1][i] = m2[1][i] + m2[5][i];
1497     m3[2][i] = m2[2][i] + m2[6][i];
1498     m3[3][i] = m2[3][i] + m2[7][i];
1499     m3[4][i] = m2[0][i] - m2[4][i];
1500     m3[5][i] = m2[1][i] - m2[5][i];
1501     m3[6][i] = m2[2][i] - m2[6][i];
1502     m3[7][i] = m2[3][i] - m2[7][i];
1503 
1504     m1[0][i] = m3[0][i] + m3[2][i];
1505     m1[1][i] = m3[1][i] + m3[3][i];
1506     m1[2][i] = m3[0][i] - m3[2][i];
1507     m1[3][i] = m3[1][i] - m3[3][i];
1508     m1[4][i] = m3[4][i] + m3[6][i];
1509     m1[5][i] = m3[5][i] + m3[7][i];
1510     m1[6][i] = m3[4][i] - m3[6][i];
1511     m1[7][i] = m3[5][i] - m3[7][i];
1512 
1513     m2[0][i] = m1[0][i] + m1[1][i];
1514     m2[1][i] = m1[0][i] - m1[1][i];
1515     m2[2][i] = m1[2][i] + m1[3][i];
1516     m2[3][i] = m1[2][i] - m1[3][i];
1517     m2[4][i] = m1[4][i] + m1[5][i];
1518     m2[5][i] = m1[4][i] - m1[5][i];
1519     m2[6][i] = m1[6][i] + m1[7][i];
1520     m2[7][i] = m1[6][i] - m1[7][i];
1521   }
1522 
1523   for (i = 0; i < 8; i++)
1524   {
1525     for (j = 0; j < 8; j++)
1526     {
1527       sad += abs(m2[i][j]);
1528     }
1529   }
1530 
1531   sad=((sad+2)>>2);
1532 
1533   return sad;
1534 }
1535 
1536 
xGetHADs(DistParam * pcDtParam)1537 Distortion TComRdCost::xGetHADs( DistParam* pcDtParam )
1538 {
1539   if ( pcDtParam->bApplyWeight )
1540   {
1541     return TComRdCostWeightPrediction::xGetHADsw( pcDtParam );
1542   }
1543   Pel* piOrg   = pcDtParam->pOrg;
1544   Pel* piCur   = pcDtParam->pCur;
1545   Int  iRows   = pcDtParam->iRows;
1546   Int  iCols   = pcDtParam->iCols;
1547   Int  iStrideCur = pcDtParam->iStrideCur;
1548   Int  iStrideOrg = pcDtParam->iStrideOrg;
1549   Int  iStep  = pcDtParam->iStep;
1550 
1551   Int  x, y;
1552 
1553   Distortion uiSum = 0;
1554 
1555   if( ( iRows % 8 == 0) && (iCols % 8 == 0) )
1556   {
1557     Int  iOffsetOrg = iStrideOrg<<3;
1558     Int  iOffsetCur = iStrideCur<<3;
1559     for ( y=0; y<iRows; y+= 8 )
1560     {
1561       for ( x=0; x<iCols; x+= 8 )
1562       {
1563         uiSum += xCalcHADs8x8( &piOrg[x], &piCur[x*iStep], iStrideOrg, iStrideCur, iStep );
1564       }
1565       piOrg += iOffsetOrg;
1566       piCur += iOffsetCur;
1567     }
1568   }
1569   else if( ( iRows % 4 == 0) && (iCols % 4 == 0) )
1570   {
1571     Int  iOffsetOrg = iStrideOrg<<2;
1572     Int  iOffsetCur = iStrideCur<<2;
1573 
1574     for ( y=0; y<iRows; y+= 4 )
1575     {
1576       for ( x=0; x<iCols; x+= 4 )
1577       {
1578         uiSum += xCalcHADs4x4( &piOrg[x], &piCur[x*iStep], iStrideOrg, iStrideCur, iStep );
1579       }
1580       piOrg += iOffsetOrg;
1581       piCur += iOffsetCur;
1582     }
1583   }
1584   else if( ( iRows % 2 == 0) && (iCols % 2 == 0) )
1585   {
1586     Int  iOffsetOrg = iStrideOrg<<1;
1587     Int  iOffsetCur = iStrideCur<<1;
1588     for ( y=0; y<iRows; y+=2 )
1589     {
1590       for ( x=0; x<iCols; x+=2 )
1591       {
1592         uiSum += xCalcHADs2x2( &piOrg[x], &piCur[x*iStep], iStrideOrg, iStrideCur, iStep );
1593       }
1594       piOrg += iOffsetOrg;
1595       piCur += iOffsetCur;
1596     }
1597   }
1598   else
1599   {
1600     assert(false);
1601   }
1602 
1603   return ( uiSum >> DISTORTION_PRECISION_ADJUSTMENT(pcDtParam->bitDepth-8) );
1604 }
1605 
1606 //! \}
1607