1 /*!
2  * \copy
3  *     Copyright (c)  2013, Cisco Systems
4  *     All rights reserved.
5  *
6  *     Redistribution and use in source and binary forms, with or without
7  *     modification, are permitted provided that the following conditions
8  *     are met:
9  *
10  *        * Redistributions of source code must retain the above copyright
11  *          notice, this list of conditions and the following disclaimer.
12  *
13  *        * Redistributions in binary form must reproduce the above copyright
14  *          notice, this list of conditions and the following disclaimer in
15  *          the documentation and/or other materials provided with the
16  *          distribution.
17  *
18  *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21  *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22  *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23  *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24  *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25  *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26  *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
28  *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  *     POSSIBILITY OF SUCH DAMAGE.
30  *
31  */
32 #include "AdaptiveQuantization.h"
33 #include "macros.h"
34 WELSVP_NAMESPACE_BEGIN
35 
36 
37 
38 #define AVERAGE_TIME_MOTION                   (3000) //0.3046875 // 1/4 + 1/16 - 1/128 ~ 0.3 *AQ_TIME_INT_MULTIPLY
39 #define AVERAGE_TIME_TEXTURE_QUALITYMODE  (10000) //0.5 // 1/2 *AQ_TIME_INT_MULTIPLY
40 #define AVERAGE_TIME_TEXTURE_BITRATEMODE  (8750) //0.5 // 1/2 *AQ_TIME_INT_MULTIPLY
41 #define MODEL_ALPHA                           (9910) //1.5 //1.1102 *AQ_TIME_INT_MULTIPLY
42 #define MODEL_TIME                            (58185) //9.0 //5.9842 *AQ_TIME_INT_MULTIPLY
43 
44 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////
45 
CAdaptiveQuantization(int32_t iCpuFlag)46 CAdaptiveQuantization::CAdaptiveQuantization (int32_t iCpuFlag) {
47   m_CPUFlag = iCpuFlag;
48   m_eMethod   = METHOD_ADAPTIVE_QUANT;
49   m_pfVar   = NULL;
50   WelsMemset (&m_sAdaptiveQuantParam, 0, sizeof (m_sAdaptiveQuantParam));
51   WelsInitVarFunc (m_pfVar, m_CPUFlag);
52 }
53 
~CAdaptiveQuantization()54 CAdaptiveQuantization::~CAdaptiveQuantization() {
55 }
56 
Process(int32_t iType,SPixMap * pSrcPixMap,SPixMap * pRefPixMap)57 EResult CAdaptiveQuantization::Process (int32_t iType, SPixMap* pSrcPixMap, SPixMap* pRefPixMap) {
58   EResult eReturn = RET_INVALIDPARAM;
59 
60   int32_t iWidth     = pSrcPixMap->sRect.iRectWidth;
61   int32_t iHeight    = pSrcPixMap->sRect.iRectHeight;
62   int32_t iMbWidth  = iWidth  >> 4;
63   int32_t iMbHeight = iHeight >> 4;
64   int32_t iMbTotalNum    = iMbWidth * iMbHeight;
65 
66   SMotionTextureUnit* pMotionTexture = NULL;
67   SVAACalcResult*     pVaaCalcResults = NULL;
68   int32_t   iMotionTextureIndexToDeltaQp = 0;
69   int32_t iAverMotionTextureIndexToDeltaQp = 0;  // double to uint32
70   int64_t iAverageMotionIndex = 0;      // double to float
71   int64_t iAverageTextureIndex = 0;
72 
73   int64_t iQStep = 0;
74   int64_t iLumaMotionDeltaQp = 0;
75   int64_t iLumaTextureDeltaQp = 0;
76 
77   uint8_t* pRefFrameY = NULL, *pCurFrameY = NULL;
78   int32_t iRefStride = 0, iCurStride = 0;
79 
80   uint8_t* pRefFrameTmp = NULL, *pCurFrameTmp = NULL;
81   int32_t i = 0, j = 0;
82 
83   pRefFrameY = (uint8_t*)pRefPixMap->pPixel[0];
84   pCurFrameY = (uint8_t*)pSrcPixMap->pPixel[0];
85 
86   iRefStride  = pRefPixMap->iStride[0];
87   iCurStride  = pSrcPixMap->iStride[0];
88 
89   /////////////////////////////////////// motion //////////////////////////////////
90   //  motion MB residual variance
91   iAverageMotionIndex = 0;
92   iAverageTextureIndex = 0;
93   pMotionTexture = m_sAdaptiveQuantParam.pMotionTextureUnit;
94   pVaaCalcResults = m_sAdaptiveQuantParam.pCalcResult;
95 
96   if (pVaaCalcResults->pRefY == pRefFrameY && pVaaCalcResults->pCurY == pCurFrameY) {
97     int32_t iMbIndex = 0;
98     int32_t iSumDiff, iSQDiff, uiSum, iSQSum;
99     for (j = 0; j < iMbHeight; j ++) {
100       pRefFrameTmp  = pRefFrameY;
101       pCurFrameTmp  = pCurFrameY;
102       for (i = 0; i < iMbWidth; i++) {
103         iSumDiff =  pVaaCalcResults->pSad8x8[iMbIndex][0];
104         iSumDiff += pVaaCalcResults->pSad8x8[iMbIndex][1];
105         iSumDiff += pVaaCalcResults->pSad8x8[iMbIndex][2];
106         iSumDiff += pVaaCalcResults->pSad8x8[iMbIndex][3];
107 
108         iSQDiff = pVaaCalcResults->pSsd16x16[iMbIndex];
109         uiSum = pVaaCalcResults->pSum16x16[iMbIndex];
110         iSQSum = pVaaCalcResults->pSumOfSquare16x16[iMbIndex];
111 
112         iSumDiff = iSumDiff >> 8;
113         pMotionTexture->uiMotionIndex = (iSQDiff >> 8) - (iSumDiff * iSumDiff);
114 
115         uiSum = uiSum >> 8;
116         pMotionTexture->uiTextureIndex = (iSQSum >> 8) - (uiSum * uiSum);
117 
118         iAverageMotionIndex += pMotionTexture->uiMotionIndex;
119         iAverageTextureIndex += pMotionTexture->uiTextureIndex;
120         pMotionTexture++;
121         ++iMbIndex;
122         pRefFrameTmp += MB_WIDTH_LUMA;
123         pCurFrameTmp += MB_WIDTH_LUMA;
124       }
125       pRefFrameY += (iRefStride) << 4;
126       pCurFrameY += (iCurStride) << 4;
127     }
128   } else {
129     for (j = 0; j < iMbHeight; j ++) {
130       pRefFrameTmp  = pRefFrameY;
131       pCurFrameTmp  = pCurFrameY;
132       for (i = 0; i < iMbWidth; i++) {
133         m_pfVar (pRefFrameTmp, iRefStride, pCurFrameTmp, iCurStride, pMotionTexture);
134         iAverageMotionIndex += pMotionTexture->uiMotionIndex;
135         iAverageTextureIndex += pMotionTexture->uiTextureIndex;
136         pMotionTexture++;
137         pRefFrameTmp += MB_WIDTH_LUMA;
138         pCurFrameTmp += MB_WIDTH_LUMA;
139 
140       }
141       pRefFrameY += (iRefStride) << 4;
142       pCurFrameY += (iCurStride) << 4;
143     }
144   }
145   iAverageMotionIndex = WELS_DIV_ROUND64 (iAverageMotionIndex * AQ_INT_MULTIPLY, iMbTotalNum);
146   iAverageTextureIndex = WELS_DIV_ROUND64 (iAverageTextureIndex * AQ_INT_MULTIPLY, iMbTotalNum);
147   if ((iAverageMotionIndex <= AQ_PESN) && (iAverageMotionIndex >= -AQ_PESN)) {
148     iAverageMotionIndex = AQ_INT_MULTIPLY;
149   }
150   if ((iAverageTextureIndex <= AQ_PESN) && (iAverageTextureIndex >= -AQ_PESN)) {
151     iAverageTextureIndex = AQ_INT_MULTIPLY;
152   }
153   //  motion mb residual map to QP
154   //  texture mb original map to QP
155   iAverMotionTextureIndexToDeltaQp = 0;
156   iAverageMotionIndex = WELS_DIV_ROUND64 (AVERAGE_TIME_MOTION * iAverageMotionIndex, AQ_TIME_INT_MULTIPLY);
157 
158   if (m_sAdaptiveQuantParam.iAdaptiveQuantMode == AQ_QUALITY_MODE) {
159     iAverageTextureIndex = WELS_DIV_ROUND64 (AVERAGE_TIME_TEXTURE_QUALITYMODE * iAverageTextureIndex, AQ_TIME_INT_MULTIPLY);
160   } else {
161     iAverageTextureIndex = WELS_DIV_ROUND64 (AVERAGE_TIME_TEXTURE_BITRATEMODE * iAverageTextureIndex, AQ_TIME_INT_MULTIPLY);
162   }
163 
164   int64_t iAQ_EPSN = - ((int64_t)AQ_PESN * AQ_TIME_INT_MULTIPLY * AQ_QSTEP_INT_MULTIPLY / AQ_INT_MULTIPLY);
165   pMotionTexture = m_sAdaptiveQuantParam.pMotionTextureUnit;
166   for (j = 0; j < iMbHeight; j ++) {
167     for (i = 0; i < iMbWidth; i++) {
168       int64_t a = WELS_DIV_ROUND64 ((int64_t) (pMotionTexture->uiTextureIndex) * AQ_INT_MULTIPLY * AQ_TIME_INT_MULTIPLY,
169                                     iAverageTextureIndex);
170       iQStep = WELS_DIV_ROUND64 ((a - AQ_TIME_INT_MULTIPLY) * AQ_QSTEP_INT_MULTIPLY, (a + MODEL_ALPHA));
171       iLumaTextureDeltaQp = MODEL_TIME * iQStep;// range +- 6
172 
173       iMotionTextureIndexToDeltaQp = ((int32_t) (iLumaTextureDeltaQp / (AQ_TIME_INT_MULTIPLY)));
174 
175       a = WELS_DIV_ROUND64 (((int64_t)pMotionTexture->uiMotionIndex) * AQ_INT_MULTIPLY * AQ_TIME_INT_MULTIPLY,
176                             iAverageMotionIndex);
177       iQStep = WELS_DIV_ROUND64 ((a - AQ_TIME_INT_MULTIPLY) * AQ_QSTEP_INT_MULTIPLY, (a + MODEL_ALPHA));
178       iLumaMotionDeltaQp = MODEL_TIME * iQStep;// range +- 6
179 
180       if ((m_sAdaptiveQuantParam.iAdaptiveQuantMode == AQ_QUALITY_MODE && iLumaMotionDeltaQp < iAQ_EPSN)
181           || (m_sAdaptiveQuantParam.iAdaptiveQuantMode == AQ_BITRATE_MODE)) {
182         iMotionTextureIndexToDeltaQp += ((int32_t) (iLumaMotionDeltaQp / (AQ_TIME_INT_MULTIPLY)));
183       }
184 
185       m_sAdaptiveQuantParam.pMotionTextureIndexToDeltaQp[j * iMbWidth + i] = (int8_t) (iMotionTextureIndexToDeltaQp /
186           AQ_QSTEP_INT_MULTIPLY);
187       iAverMotionTextureIndexToDeltaQp += iMotionTextureIndexToDeltaQp;
188       pMotionTexture++;
189     }
190   }
191 
192   m_sAdaptiveQuantParam.iAverMotionTextureIndexToDeltaQp = iAverMotionTextureIndexToDeltaQp / iMbTotalNum;
193 
194   eReturn = RET_SUCCESS;
195 
196   return eReturn;
197 }
198 
199 
200 
Set(int32_t iType,void * pParam)201 EResult CAdaptiveQuantization::Set (int32_t iType, void* pParam) {
202   if (pParam == NULL) {
203     return RET_INVALIDPARAM;
204   }
205 
206   m_sAdaptiveQuantParam = * (SAdaptiveQuantizationParam*)pParam;
207 
208   return RET_SUCCESS;
209 }
210 
Get(int32_t iType,void * pParam)211 EResult CAdaptiveQuantization::Get (int32_t iType, void* pParam) {
212   if (pParam == NULL) {
213     return RET_INVALIDPARAM;
214   }
215 
216   SAdaptiveQuantizationParam* sAdaptiveQuantParam = (SAdaptiveQuantizationParam*)pParam;
217 
218   sAdaptiveQuantParam->iAverMotionTextureIndexToDeltaQp = m_sAdaptiveQuantParam.iAverMotionTextureIndexToDeltaQp;
219 
220   return RET_SUCCESS;
221 }
222 
223 ///////////////////////////////////////////////////////////////////////////////////////////////
224 
WelsInitVarFunc(PVarFunc & pfVar,int32_t iCpuFlag)225 void CAdaptiveQuantization::WelsInitVarFunc (PVarFunc& pfVar,  int32_t iCpuFlag) {
226   pfVar = SampleVariance16x16_c;
227 
228 #ifdef X86_ASM
229   if (iCpuFlag & WELS_CPU_SSE2) {
230     pfVar = SampleVariance16x16_sse2;
231   }
232 #endif
233 #ifdef HAVE_NEON
234   if (iCpuFlag & WELS_CPU_NEON) {
235     pfVar = SampleVariance16x16_neon;
236   }
237 #endif
238 #ifdef HAVE_NEON_AARCH64
239   if (iCpuFlag & WELS_CPU_NEON) {
240     pfVar = SampleVariance16x16_AArch64_neon;
241   }
242 #endif
243 }
244 
SampleVariance16x16_c(uint8_t * pRefY,int32_t iRefStride,uint8_t * pSrcY,int32_t iSrcStride,SMotionTextureUnit * pMotionTexture)245 void SampleVariance16x16_c (uint8_t* pRefY, int32_t iRefStride, uint8_t* pSrcY, int32_t iSrcStride,
246                             SMotionTextureUnit* pMotionTexture) {
247   uint32_t uiCurSquare = 0,  uiSquare = 0;
248   uint16_t uiCurSum = 0,  uiSum = 0;
249 
250   for (int32_t y = 0; y < MB_WIDTH_LUMA; y++) {
251     for (int32_t x = 0; x < MB_WIDTH_LUMA; x++) {
252       uint32_t uiDiff = WELS_ABS (pRefY[x] - pSrcY[x]);
253       uiSum += uiDiff;
254       uiSquare += uiDiff * uiDiff;
255 
256       uiCurSum += pSrcY[x];
257       uiCurSquare += pSrcY[x] * pSrcY[x];
258     }
259     pRefY += iRefStride;
260     pSrcY += iSrcStride;
261   }
262 
263   uiSum = uiSum >> 8;
264   pMotionTexture->uiMotionIndex = (uiSquare >> 8) - (uiSum * uiSum);
265 
266   uiCurSum = uiCurSum >> 8;
267   pMotionTexture->uiTextureIndex = (uiCurSquare >> 8) - (uiCurSum * uiCurSum);
268 }
269 
270 WELSVP_NAMESPACE_END
271