1 /*!
2  * \copy
3  *     Copyright (c)  2009-2013, Cisco Systems
4  *     All rights reserved.
5  *
6  *     Redistribution and use in source and binary forms, with or without
7  *     modification, are permitted provided that the following conditions
8  *     are met:
9  *
10  *        * Redistributions of source code must retain the above copyright
11  *          notice, this list of conditions and the following disclaimer.
12  *
13  *        * Redistributions in binary form must reproduce the above copyright
14  *          notice, this list of conditions and the following disclaimer in
15  *          the documentation and/or other materials provided with the
16  *          distribution.
17  *
18  *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21  *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22  *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23  *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24  *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25  *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26  *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
28  *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  *     POSSIBILITY OF SUCH DAMAGE.
30  *
31  *
32  * \file    deblocking.c
33  *
34  * \brief   Interfaces introduced in frame deblocking filtering
35  *
36  * \date    08/03/2009 Created
37  *
38  *************************************************************************************
39  */
40 
41 #include "deblocking.h"
42 #include "cpu_core.h"
43 
44 namespace WelsEnc {
45 
46 #define g_kuiAlphaTable(x) g_kuiAlphaTable[(x)]
47 #define g_kiBetaTable(x)  g_kiBetaTable[(x)]
48 #define g_kiTc0Table(x)   g_kiTc0Table[(x)]
49 
50 #define MB_BS_MV(sCurMv, sNeighMv, uiBIdx, uiBnIdx) \
51   (\
52   ( WELS_ABS( sCurMv[uiBIdx].iMvX - sNeighMv[uiBnIdx].iMvX ) >= 4 ) ||\
53   ( WELS_ABS( sCurMv[uiBIdx].iMvY - sNeighMv[uiBnIdx].iMvY ) >= 4 )\
54   )
55 
56 #define SMB_EDGE_MV(uiRefIndex, sMotionVector, uiBIdx, uiBnIdx) \
57   (\
58   !!((WELS_ABS(sMotionVector[uiBIdx].iMvX - sMotionVector[uiBnIdx].iMvX) &(~3)) | (WELS_ABS(sMotionVector[uiBIdx].iMvY - sMotionVector[uiBnIdx].iMvY) &(~3)))\
59   )
60 
61 #define BS_EDGE(bsx1, uiRefIndex, sMotionVector, uiBIdx, uiBnIdx) \
62   ( (bsx1|SMB_EDGE_MV(uiRefIndex, sMotionVector, uiBIdx, uiBnIdx))<<(bsx1?1:0))
63 
64 #define GET_ALPHA_BETA_FROM_QP(QP, iAlphaOffset, iBetaOffset, iIdexA, iAlpha, iBeta) \
65 {\
66   iIdexA = (QP + iAlphaOffset);\
67   iIdexA = CLIP3_QP_0_51(iIdexA);\
68   iAlpha = g_kuiAlphaTable(iIdexA);\
69   iBeta  = g_kiBetaTable((CLIP3_QP_0_51(QP + iBetaOffset)));\
70 }
71 
72 static const uint8_t g_kuiAlphaTable[52 + 12] = { //this table refers to Table 8-16 in H.264/AVC standard
73   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
74   0,  0,  0,  0,  0,  0,  4,  4,  5,  6,
75   7,  8,  9, 10, 12, 13, 15, 17, 20, 22,
76   25, 28, 32, 36, 40, 45, 50, 56, 63, 71,
77   80, 90, 101, 113, 127, 144, 162, 182, 203, 226,
78   255, 255
79   , 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255
80 };
81 
82 static const int8_t g_kiBetaTable[52 + 12] = { //this table refers to Table 8-16 in H.264/AVC standard
83   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
84   0,  0,  0,  0,  0,  0,  2,  2,  2,  3,
85   3,  3,  3,  4,  4,  4,  6,  6,  7,  7,
86   8,  8,  9,  9, 10, 10, 11, 11, 12, 12,
87   13, 13, 14, 14, 15, 15, 16, 16, 17, 17,
88   18, 18
89   , 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18
90 };
91 
92 static const int8_t g_kiTc0Table[52 + 12][4] = { //this table refers Table 8-17 in H.264/AVC standard
93   { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 },
94   { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 },
95   { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 1 },
96   { -1, 0, 0, 1 }, { -1, 0, 0, 1 }, { -1, 0, 0, 1 }, { -1, 0, 1, 1 }, { -1, 0, 1, 1 }, { -1, 1, 1, 1 },
97   { -1, 1, 1, 1 }, { -1, 1, 1, 1 }, { -1, 1, 1, 1 }, { -1, 1, 1, 2 }, { -1, 1, 1, 2 }, { -1, 1, 1, 2 },
98   { -1, 1, 1, 2 }, { -1, 1, 2, 3 }, { -1, 1, 2, 3 }, { -1, 2, 2, 3 }, { -1, 2, 2, 4 }, { -1, 2, 3, 4 },
99   { -1, 2, 3, 4 }, { -1, 3, 3, 5 }, { -1, 3, 4, 6 }, { -1, 3, 4, 6 }, { -1, 4, 5, 7 }, { -1, 4, 5, 8 },
100   { -1, 4, 6, 9 }, { -1, 5, 7, 10 }, { -1, 6, 8, 11 }, { -1, 6, 8, 13 }, { -1, 7, 10, 14 }, { -1, 8, 11, 16 },
101   { -1, 9, 12, 18 }, { -1, 10, 13, 20 }, { -1, 11, 15, 23 }, { -1, 13, 17, 25 }
102   , { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 }
103   , { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 }
104 };
105 
106 static const uint8_t g_kuiTableBIdx[2][8] = {
107   {
108     0,  4,  8,  12, // g_kuiTableBIdx
109     3,  7,  11, 15
110   }, // table_bn_idx
111 
112   {
113     0,  1,  2,  3 , // g_kuiTableBIdx
114     12, 13, 14, 15
115   }, // table_bn_idx
116 };
117 
118 #define TC0_TBL_LOOKUP(iTc, iIdexA, pBS, bchroma) \
119 {\
120   iTc[0] = g_kiTc0Table(iIdexA)[pBS[0]] + bchroma;\
121   iTc[1] = g_kiTc0Table(iIdexA)[pBS[1]] + bchroma;\
122   iTc[2] = g_kiTc0Table(iIdexA)[pBS[2]] + bchroma;\
123   iTc[3] = g_kiTc0Table(iIdexA)[pBS[3]] + bchroma;\
124 }
125 
DeblockingBSInsideMBAvsbase(int8_t * pNnzTab,uint8_t uiBS[2][4][4],int32_t iLShiftFactor)126 void inline DeblockingBSInsideMBAvsbase (int8_t* pNnzTab, uint8_t uiBS[2][4][4], int32_t iLShiftFactor) {
127   uint32_t uiNnz32b0, uiNnz32b1, uiNnz32b2, uiNnz32b3;
128 
129   uiNnz32b0 = * (uint32_t*) (pNnzTab + 0);
130   uiNnz32b1 = * (uint32_t*) (pNnzTab + 4);
131   uiNnz32b2 = * (uint32_t*) (pNnzTab + 8);
132   uiNnz32b3 = * (uint32_t*) (pNnzTab + 12);
133 
134   uiBS[0][1][0] = (pNnzTab[0] | pNnzTab[1]) << iLShiftFactor;
135   uiBS[0][2][0] = (pNnzTab[1] | pNnzTab[2]) << iLShiftFactor;
136   uiBS[0][3][0] = (pNnzTab[2] | pNnzTab[3]) << iLShiftFactor;
137 
138   uiBS[0][1][1] = (pNnzTab[4] | pNnzTab[5]) << iLShiftFactor;
139   uiBS[0][2][1] = (pNnzTab[5] | pNnzTab[6]) << iLShiftFactor;
140   uiBS[0][3][1] = (pNnzTab[6] | pNnzTab[7]) << iLShiftFactor;
141   * (uint32_t*)uiBS[1][1] = (uiNnz32b0 | uiNnz32b1) << iLShiftFactor;
142 
143   uiBS[0][1][2] = (pNnzTab[8]  | pNnzTab[9])  << iLShiftFactor;
144   uiBS[0][2][2] = (pNnzTab[9]  | pNnzTab[10]) << iLShiftFactor;
145   uiBS[0][3][2] = (pNnzTab[10] | pNnzTab[11]) << iLShiftFactor;
146   * (uint32_t*)uiBS[1][2] = (uiNnz32b1 | uiNnz32b2) << iLShiftFactor;
147 
148   uiBS[0][1][3] = (pNnzTab[12] | pNnzTab[13]) << iLShiftFactor;
149   uiBS[0][2][3] = (pNnzTab[13] | pNnzTab[14]) << iLShiftFactor;
150   uiBS[0][3][3] = (pNnzTab[14] | pNnzTab[15]) << iLShiftFactor;
151   * (uint32_t*)uiBS[1][3] = (uiNnz32b2 | uiNnz32b3) << iLShiftFactor;
152 
153 }
154 
DeblockingBSInsideMBNormal(SMB * pCurMb,uint8_t uiBS[2][4][4],int8_t * pNnzTab)155 void inline DeblockingBSInsideMBNormal (SMB* pCurMb, uint8_t uiBS[2][4][4], int8_t* pNnzTab) {
156   uint32_t uiNnz32b0, uiNnz32b1, uiNnz32b2, uiNnz32b3;
157   ENFORCE_STACK_ALIGN_1D (uint8_t, uiBsx4, 4, 4);
158 
159   uiNnz32b0 = * (uint32_t*) (pNnzTab + 0);
160   uiNnz32b1 = * (uint32_t*) (pNnzTab + 4);
161   uiNnz32b2 = * (uint32_t*) (pNnzTab + 8);
162   uiNnz32b3 = * (uint32_t*) (pNnzTab + 12);
163 
164   for (int i = 0; i < 3; i++)
165     uiBsx4[i] = pNnzTab[i] | pNnzTab[i + 1];
166   uiBS[0][1][0] = BS_EDGE (uiBsx4[0], iRefIdx, pCurMb->sMv, 1, 0);
167   uiBS[0][2][0] = BS_EDGE (uiBsx4[1], iRefIdx, pCurMb->sMv, 2, 1);
168   uiBS[0][3][0] = BS_EDGE (uiBsx4[2], iRefIdx, pCurMb->sMv, 3, 2);
169 
170   for (int i = 0; i < 3; i++)
171     uiBsx4[i] = pNnzTab[4 + i] | pNnzTab[4 + i + 1];
172   uiBS[0][1][1] = BS_EDGE (uiBsx4[0], iRefIdx, pCurMb->sMv, 5, 4);
173   uiBS[0][2][1] = BS_EDGE (uiBsx4[1], iRefIdx, pCurMb->sMv, 6, 5);
174   uiBS[0][3][1] = BS_EDGE (uiBsx4[2], iRefIdx, pCurMb->sMv, 7, 6);
175 
176   for (int i = 0; i < 3; i++)
177     uiBsx4[i] = pNnzTab[8 + i] | pNnzTab[8 + i + 1];
178   uiBS[0][1][2] = BS_EDGE (uiBsx4[0], iRefIdx, pCurMb->sMv, 9, 8);
179   uiBS[0][2][2] = BS_EDGE (uiBsx4[1], iRefIdx, pCurMb->sMv, 10, 9);
180   uiBS[0][3][2] = BS_EDGE (uiBsx4[2], iRefIdx, pCurMb->sMv, 11, 10);
181 
182   for (int i = 0; i < 3; i++)
183     uiBsx4[i] = pNnzTab[12 + i] | pNnzTab[12 + i + 1];
184   uiBS[0][1][3] = BS_EDGE (uiBsx4[0], iRefIdx, pCurMb->sMv, 13, 12);
185   uiBS[0][2][3] = BS_EDGE (uiBsx4[1], iRefIdx, pCurMb->sMv, 14, 13);
186   uiBS[0][3][3] = BS_EDGE (uiBsx4[2], iRefIdx, pCurMb->sMv, 15, 14);
187 
188   //horizontal
189   * (uint32_t*)uiBsx4 = (uiNnz32b0 | uiNnz32b1);
190   uiBS[1][1][0] = BS_EDGE (uiBsx4[0], iRefIdx, pCurMb->sMv, 4, 0);
191   uiBS[1][1][1] = BS_EDGE (uiBsx4[1], iRefIdx, pCurMb->sMv, 5, 1);
192   uiBS[1][1][2] = BS_EDGE (uiBsx4[2], iRefIdx, pCurMb->sMv, 6, 2);
193   uiBS[1][1][3] = BS_EDGE (uiBsx4[3], iRefIdx, pCurMb->sMv, 7, 3);
194 
195   * (uint32_t*)uiBsx4 = (uiNnz32b1 | uiNnz32b2);
196   uiBS[1][2][0] = BS_EDGE (uiBsx4[0], iRefIdx, pCurMb->sMv, 8, 4);
197   uiBS[1][2][1] = BS_EDGE (uiBsx4[1], iRefIdx, pCurMb->sMv, 9, 5);
198   uiBS[1][2][2] = BS_EDGE (uiBsx4[2], iRefIdx, pCurMb->sMv, 10, 6);
199   uiBS[1][2][3] = BS_EDGE (uiBsx4[3], iRefIdx, pCurMb->sMv, 11, 7);
200 
201   * (uint32_t*)uiBsx4 = (uiNnz32b2 | uiNnz32b3);
202   uiBS[1][3][0] = BS_EDGE (uiBsx4[0], iRefIdx, pCurMb->sMv, 12, 8);
203   uiBS[1][3][1] = BS_EDGE (uiBsx4[1], iRefIdx, pCurMb->sMv, 13, 9);
204   uiBS[1][3][2] = BS_EDGE (uiBsx4[2], iRefIdx, pCurMb->sMv, 14, 10);
205   uiBS[1][3][3] = BS_EDGE (uiBsx4[3], iRefIdx, pCurMb->sMv, 15, 11);
206 }
207 
DeblockingBSMarginalMBAvcbase(SMB * pCurMb,SMB * pNeighMb,int32_t iEdge)208 uint32_t DeblockingBSMarginalMBAvcbase (SMB* pCurMb, SMB* pNeighMb, int32_t iEdge) {
209   int32_t i;
210   uint32_t uiBSx4;
211   uint8_t* pBS = (uint8_t*) (&uiBSx4);
212   const uint8_t* pBIdx  = &g_kuiTableBIdx[iEdge][0];
213   const uint8_t* pBnIdx = &g_kuiTableBIdx[iEdge][4];
214 
215 
216   for (i = 0; i < 4; i++) {
217     if (pCurMb->pNonZeroCount[*pBIdx] | pNeighMb->pNonZeroCount[*pBnIdx]) {
218       pBS[i] = 2;
219     } else {
220       pBS[i] =
221 #ifndef SINGLE_REF_FRAME
222         (pCurMb->uiRefIndex[g_kiTableBlock8x8Idx[1][iEdge][i]] - pNeighMb->uiRefIndex[g_kiTableBlock8x8NIdx[1][iEdge][i]]) ||
223 #endif
224         MB_BS_MV (pCurMb->sMv, pNeighMb->sMv, *pBIdx, *pBnIdx);
225     }
226     pBIdx++;
227     pBnIdx++;
228   }
229   return uiBSx4;
230 }
231 
FilteringEdgeLumaH(DeblockingFunc * pfDeblocking,SDeblockingFilter * pFilter,uint8_t * pPix,int32_t iStride,uint8_t * pBS)232 void FilteringEdgeLumaH (DeblockingFunc* pfDeblocking, SDeblockingFilter* pFilter, uint8_t* pPix, int32_t iStride,
233                          uint8_t* pBS) {
234   int32_t iIdexA;
235   int32_t iAlpha;
236   int32_t iBeta;
237   ENFORCE_STACK_ALIGN_1D (int8_t, iTc, 4, 16);
238 
239   GET_ALPHA_BETA_FROM_QP (pFilter->uiLumaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIdexA, iAlpha,
240                           iBeta);
241 
242   if (iAlpha | iBeta) {
243     TC0_TBL_LOOKUP (iTc, iIdexA, pBS, 0);
244     pfDeblocking->pfLumaDeblockingLT4Ver (pPix, iStride, iAlpha, iBeta, iTc);
245   }
246   return;
247 }
FilteringEdgeLumaV(DeblockingFunc * pfDeblocking,SDeblockingFilter * pFilter,uint8_t * pPix,int32_t iStride,uint8_t * pBS)248 void FilteringEdgeLumaV (DeblockingFunc* pfDeblocking, SDeblockingFilter* pFilter, uint8_t* pPix, int32_t iStride,
249                          uint8_t* pBS) {
250   int32_t  iIdexA;
251   int32_t  iAlpha;
252   int32_t  iBeta;
253   ENFORCE_STACK_ALIGN_1D (int8_t, iTc, 4, 16);
254 
255   GET_ALPHA_BETA_FROM_QP (pFilter->uiLumaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIdexA, iAlpha,
256                           iBeta);
257 
258   if (iAlpha | iBeta) {
259     TC0_TBL_LOOKUP (iTc, iIdexA, pBS, 0);
260     pfDeblocking->pfLumaDeblockingLT4Hor (pPix, iStride, iAlpha, iBeta, iTc);
261   }
262   return;
263 }
264 
FilteringEdgeLumaIntraH(DeblockingFunc * pfDeblocking,SDeblockingFilter * pFilter,uint8_t * pPix,int32_t iStride,uint8_t * pBS)265 void FilteringEdgeLumaIntraH (DeblockingFunc* pfDeblocking, SDeblockingFilter* pFilter, uint8_t* pPix, int32_t iStride,
266                               uint8_t* pBS) {
267   int32_t iIdexA;
268   int32_t iAlpha;
269   int32_t iBeta;
270 
271   GET_ALPHA_BETA_FROM_QP (pFilter->uiLumaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIdexA, iAlpha,
272                           iBeta);
273 
274   if (iAlpha | iBeta) {
275     pfDeblocking->pfLumaDeblockingEQ4Ver (pPix, iStride, iAlpha, iBeta);
276   }
277   return;
278 }
279 
FilteringEdgeLumaIntraV(DeblockingFunc * pfDeblocking,SDeblockingFilter * pFilter,uint8_t * pPix,int32_t iStride,uint8_t * pBS)280 void FilteringEdgeLumaIntraV (DeblockingFunc* pfDeblocking, SDeblockingFilter* pFilter, uint8_t* pPix, int32_t iStride,
281                               uint8_t* pBS) {
282   int32_t iIdexA;
283   int32_t iAlpha;
284   int32_t iBeta;
285 
286   GET_ALPHA_BETA_FROM_QP (pFilter->uiLumaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIdexA, iAlpha,
287                           iBeta);
288 
289   if (iAlpha | iBeta) {
290     pfDeblocking->pfLumaDeblockingEQ4Hor (pPix, iStride, iAlpha, iBeta);
291   }
292   return;
293 }
FilteringEdgeChromaH(DeblockingFunc * pfDeblocking,SDeblockingFilter * pFilter,uint8_t * pPixCb,uint8_t * pPixCr,int32_t iStride,uint8_t * pBS)294 void FilteringEdgeChromaH (DeblockingFunc* pfDeblocking, SDeblockingFilter* pFilter, uint8_t* pPixCb, uint8_t* pPixCr,
295                            int32_t iStride, uint8_t* pBS) {
296   int32_t iIdexA;
297   int32_t iAlpha;
298   int32_t iBeta;
299   ENFORCE_STACK_ALIGN_1D (int8_t, iTc, 4, 16);
300 
301   GET_ALPHA_BETA_FROM_QP (pFilter->uiChromaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIdexA, iAlpha,
302                           iBeta);
303 
304   if (iAlpha | iBeta) {
305     TC0_TBL_LOOKUP (iTc, iIdexA, pBS, 1);
306     pfDeblocking->pfChromaDeblockingLT4Ver (pPixCb, pPixCr, iStride, iAlpha, iBeta, iTc);
307   }
308   return;
309 }
FilteringEdgeChromaV(DeblockingFunc * pfDeblocking,SDeblockingFilter * pFilter,uint8_t * pPixCb,uint8_t * pPixCr,int32_t iStride,uint8_t * pBS)310 void FilteringEdgeChromaV (DeblockingFunc* pfDeblocking, SDeblockingFilter* pFilter, uint8_t* pPixCb, uint8_t* pPixCr,
311                            int32_t iStride, uint8_t* pBS) {
312   int32_t iIdexA;
313   int32_t iAlpha;
314   int32_t iBeta;
315   ENFORCE_STACK_ALIGN_1D (int8_t, iTc, 4, 16);
316 
317   GET_ALPHA_BETA_FROM_QP (pFilter->uiChromaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIdexA, iAlpha,
318                           iBeta);
319 
320   if (iAlpha | iBeta) {
321     TC0_TBL_LOOKUP (iTc, iIdexA, pBS, 1);
322     pfDeblocking->pfChromaDeblockingLT4Hor (pPixCb, pPixCr, iStride, iAlpha, iBeta, iTc);
323   }
324   return;
325 }
326 
FilteringEdgeChromaIntraH(DeblockingFunc * pfDeblocking,SDeblockingFilter * pFilter,uint8_t * pPixCb,uint8_t * pPixCr,int32_t iStride,uint8_t * pBS)327 void FilteringEdgeChromaIntraH (DeblockingFunc* pfDeblocking, SDeblockingFilter* pFilter, uint8_t* pPixCb,
328                                 uint8_t* pPixCr, int32_t iStride, uint8_t* pBS) {
329   int32_t iIdexA;
330   int32_t iAlpha;
331   int32_t iBeta;
332 
333   GET_ALPHA_BETA_FROM_QP (pFilter->uiChromaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIdexA, iAlpha,
334                           iBeta);
335 
336   if (iAlpha | iBeta) {
337     pfDeblocking->pfChromaDeblockingEQ4Ver (pPixCb, pPixCr, iStride, iAlpha, iBeta);
338   }
339   return;
340 }
341 
FilteringEdgeChromaIntraV(DeblockingFunc * pfDeblocking,SDeblockingFilter * pFilter,uint8_t * pPixCb,uint8_t * pPixCr,int32_t iStride,uint8_t * pBS)342 void FilteringEdgeChromaIntraV (DeblockingFunc* pfDeblocking, SDeblockingFilter* pFilter, uint8_t* pPixCb,
343                                 uint8_t* pPixCr, int32_t iStride, uint8_t* pBS) {
344   int32_t iIdexA;
345   int32_t iAlpha;
346   int32_t iBeta;
347 
348   GET_ALPHA_BETA_FROM_QP (pFilter->uiChromaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIdexA, iAlpha,
349                           iBeta);
350 
351   if (iAlpha | iBeta) {
352     pfDeblocking->pfChromaDeblockingEQ4Hor (pPixCb, pPixCr, iStride, iAlpha, iBeta);
353   }
354   return;
355 }
356 
DeblockingInterMb(DeblockingFunc * pfDeblocking,SMB * pCurMb,SDeblockingFilter * pFilter,uint8_t uiBS[2][4][4])357 void DeblockingInterMb (DeblockingFunc* pfDeblocking, SMB* pCurMb, SDeblockingFilter* pFilter, uint8_t uiBS[2][4][4]) {
358   int8_t iCurLumaQp   = pCurMb->uiLumaQp;
359   int8_t iCurChromaQp = pCurMb->uiChromaQp;
360   int32_t iLineSize     = pFilter->iCsStride[0];
361   int32_t iLineSizeUV   = pFilter->iCsStride[1];
362   int32_t iMbStride    = pFilter->iMbStride;
363 
364   int32_t iMbX = pCurMb->iMbX;
365   int32_t iMbY = pCurMb->iMbY;
366 
367   bool bLeftBsValid[2] = { (iMbX > 0), ((iMbX > 0)&& (pCurMb->uiSliceIdc == (pCurMb - 1)->uiSliceIdc))};
368   bool bTopBsValid[2]  = { (iMbY > 0), ((iMbY > 0)&& (pCurMb->uiSliceIdc == (pCurMb - iMbStride)->uiSliceIdc))};
369 
370   int32_t iLeftFlag = bLeftBsValid[pFilter->uiFilterIdc];
371   int32_t iTopFlag  = bTopBsValid[pFilter->uiFilterIdc];
372 
373   uint8_t* pDestY, *pDestCb, *pDestCr;
374   pDestY  = pFilter->pCsData[0];
375   pDestCb = pFilter->pCsData[1];
376   pDestCr = pFilter->pCsData[2];
377 
378   if (iLeftFlag) {
379     pFilter->uiLumaQP   = (iCurLumaQp + (pCurMb - 1)->uiLumaQp + 1) >> 1;
380     pFilter->uiChromaQP = (iCurChromaQp + (pCurMb - 1)->uiChromaQp + 1) >> 1;
381 
382     if (uiBS[0][0][0] == 0x04) {
383       FilteringEdgeLumaIntraV (pfDeblocking, pFilter, pDestY, iLineSize , NULL);
384       FilteringEdgeChromaIntraV (pfDeblocking, pFilter, pDestCb, pDestCr, iLineSizeUV, NULL);
385     } else {
386       if (* (uint32_t*)uiBS[0][0] != 0) {
387         FilteringEdgeLumaV (pfDeblocking, pFilter, pDestY, iLineSize, uiBS[0][0]);
388         FilteringEdgeChromaV (pfDeblocking, pFilter, pDestCb, pDestCr, iLineSizeUV, uiBS[0][0]);
389       }
390     }
391   }
392 
393   pFilter->uiLumaQP = iCurLumaQp;
394   pFilter->uiChromaQP = iCurChromaQp;
395 
396   if (* (uint32_t*)uiBS[0][1] != 0) {
397     FilteringEdgeLumaV (pfDeblocking, pFilter, &pDestY[1 << 2], iLineSize, uiBS[0][1]);
398   }
399 
400   if (* (uint32_t*)uiBS[0][2] != 0) {
401     FilteringEdgeLumaV (pfDeblocking, pFilter, &pDestY[2 << 2], iLineSize, uiBS[0][2]);
402     FilteringEdgeChromaV (pfDeblocking, pFilter, &pDestCb[2 << 1], &pDestCr[2 << 1], iLineSizeUV, uiBS[0][2]);
403   }
404 
405   if (* (uint32_t*)uiBS[0][3] != 0) {
406     FilteringEdgeLumaV (pfDeblocking, pFilter, &pDestY[3 << 2], iLineSize, uiBS[0][3]);
407   }
408 
409   if (iTopFlag) {
410     pFilter->uiLumaQP = (iCurLumaQp + (pCurMb - iMbStride)->uiLumaQp + 1) >> 1;
411     pFilter->uiChromaQP = (iCurChromaQp + (pCurMb - iMbStride)->uiChromaQp + 1) >> 1;
412 
413     if (uiBS[1][0][0] == 0x04) {
414       FilteringEdgeLumaIntraH (pfDeblocking, pFilter, pDestY, iLineSize , NULL);
415       FilteringEdgeChromaIntraH (pfDeblocking, pFilter, pDestCb, pDestCr, iLineSizeUV, NULL);
416     } else {
417       if (* (uint32_t*)uiBS[1][0] != 0) {
418         FilteringEdgeLumaH (pfDeblocking, pFilter, pDestY, iLineSize, uiBS[1][0]);
419         FilteringEdgeChromaH (pfDeblocking, pFilter, pDestCb, pDestCr, iLineSizeUV, uiBS[1][0]);
420       }
421     }
422   }
423 
424   pFilter->uiLumaQP = iCurLumaQp;
425   pFilter->uiChromaQP = iCurChromaQp;
426 
427   if (* (uint32_t*)uiBS[1][1] != 0) {
428     FilteringEdgeLumaH (pfDeblocking, pFilter, &pDestY[ (1 << 2)*iLineSize], iLineSize, uiBS[1][1]);
429   }
430 
431   if (* (uint32_t*)uiBS[1][2] != 0) {
432     FilteringEdgeLumaH (pfDeblocking, pFilter, &pDestY[ (2 << 2)*iLineSize], iLineSize, uiBS[1][2]);
433     FilteringEdgeChromaH (pfDeblocking, pFilter, &pDestCb[ (2 << 1)*iLineSizeUV], &pDestCr[ (2 << 1)*iLineSizeUV],
434                           iLineSizeUV, uiBS[1][2]);
435   }
436 
437   if (* (uint32_t*)uiBS[1][3] != 0) {
438     FilteringEdgeLumaH (pfDeblocking, pFilter, &pDestY[ (3 << 2)*iLineSize], iLineSize, uiBS[1][3]);
439   }
440 }
441 
FilteringEdgeLumaHV(DeblockingFunc * pfDeblocking,SMB * pCurMb,SDeblockingFilter * pFilter)442 void FilteringEdgeLumaHV (DeblockingFunc* pfDeblocking, SMB* pCurMb, SDeblockingFilter* pFilter) {
443   int32_t iLineSize  = pFilter->iCsStride[0];
444   int32_t iMbStride = pFilter->iMbStride;
445 
446   uint8_t*  pDestY;
447   int8_t   iCurQp;
448   int32_t  iIdexA, iAlpha, iBeta;
449 
450   int32_t iMbX = pCurMb->iMbX;
451   int32_t iMbY = pCurMb->iMbY;
452 
453   bool bLeftBsValid[2] = { (iMbX > 0), ((iMbX > 0)&& (pCurMb->uiSliceIdc == (pCurMb - 1)->uiSliceIdc))};
454   bool bTopBsValid[2]  = { (iMbY > 0), ((iMbY > 0)&& (pCurMb->uiSliceIdc == (pCurMb - iMbStride)->uiSliceIdc))};
455 
456   int32_t iLeftFlag = bLeftBsValid[pFilter->uiFilterIdc];
457   int32_t iTopFlag  = bTopBsValid[pFilter->uiFilterIdc];
458 
459   ENFORCE_STACK_ALIGN_1D (int8_t,  iTc,   4, 16);
460   ENFORCE_STACK_ALIGN_1D (uint8_t, uiBSx4, 4, 4);
461 
462   pDestY  = pFilter->pCsData[0];
463   iCurQp  = pCurMb->uiLumaQp;
464 
465   * (uint32_t*)uiBSx4 = 0x03030303;
466 
467   // luma v
468   if (iLeftFlag) {
469     pFilter->uiLumaQP = (iCurQp + (pCurMb - 1)->uiLumaQp + 1) >> 1;
470     FilteringEdgeLumaIntraV (pfDeblocking, pFilter, pDestY, iLineSize, NULL);
471   }
472 
473   pFilter->uiLumaQP   = iCurQp;
474   GET_ALPHA_BETA_FROM_QP (pFilter->uiLumaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIdexA, iAlpha,
475                           iBeta);
476   if (iAlpha | iBeta) {
477     TC0_TBL_LOOKUP (iTc, iIdexA, uiBSx4, 0);
478     pfDeblocking->pfLumaDeblockingLT4Hor (&pDestY[1 << 2], iLineSize, iAlpha, iBeta, iTc);
479     pfDeblocking->pfLumaDeblockingLT4Hor (&pDestY[2 << 2], iLineSize, iAlpha, iBeta, iTc);
480     pfDeblocking->pfLumaDeblockingLT4Hor (&pDestY[3 << 2], iLineSize, iAlpha, iBeta, iTc);
481 
482   }
483 
484   // luma h
485   if (iTopFlag) {
486     pFilter->uiLumaQP   = (iCurQp   + (pCurMb - iMbStride)->uiLumaQp + 1) >> 1;
487     FilteringEdgeLumaIntraH (pfDeblocking, pFilter, pDestY, iLineSize, NULL);
488   }
489 
490   pFilter->uiLumaQP   = iCurQp;
491   if (iAlpha | iBeta) {
492     pfDeblocking->pfLumaDeblockingLT4Ver (&pDestY[ (1 << 2)*iLineSize], iLineSize, iAlpha, iBeta, iTc);
493     pfDeblocking->pfLumaDeblockingLT4Ver (&pDestY[ (2 << 2)*iLineSize], iLineSize, iAlpha, iBeta, iTc);
494     pfDeblocking->pfLumaDeblockingLT4Ver (&pDestY[ (3 << 2)*iLineSize], iLineSize, iAlpha, iBeta, iTc);
495   }
496 }
FilteringEdgeChromaHV(DeblockingFunc * pfDeblocking,SMB * pCurMb,SDeblockingFilter * pFilter)497 void FilteringEdgeChromaHV (DeblockingFunc* pfDeblocking, SMB* pCurMb, SDeblockingFilter* pFilter) {
498   int32_t iLineSize  = pFilter->iCsStride[1];
499   int32_t iMbStride = pFilter->iMbStride;
500 
501   uint8_t*  pDestCb, *pDestCr;
502   int8_t   iCurQp;
503   int32_t  iIdexA, iAlpha, iBeta;
504 
505   int32_t iMbX = pCurMb->iMbX;
506   int32_t iMbY = pCurMb->iMbY;
507 
508   bool bLeftBsValid[2] = { (iMbX > 0), ((iMbX > 0)&& (pCurMb->uiSliceIdc == (pCurMb - 1)->uiSliceIdc))};
509   bool bTopBsValid[2]  = { (iMbY > 0), ((iMbY > 0)&& (pCurMb->uiSliceIdc == (pCurMb - iMbStride)->uiSliceIdc))};
510 
511   int32_t iLeftFlag = bLeftBsValid[pFilter->uiFilterIdc];
512   int32_t iTopFlag  = bTopBsValid[pFilter->uiFilterIdc];
513 
514   ENFORCE_STACK_ALIGN_1D (int8_t,  iTc,   4, 16);
515   ENFORCE_STACK_ALIGN_1D (uint8_t, uiBSx4, 4, 4);
516 
517   pDestCb = pFilter->pCsData[1];
518   pDestCr = pFilter->pCsData[2];
519   iCurQp  = pCurMb->uiChromaQp;
520   * (uint32_t*)uiBSx4 = 0x03030303;
521 
522   // chroma v
523   if (iLeftFlag) {
524     pFilter->uiChromaQP = (iCurQp + (pCurMb - 1)->uiChromaQp + 1) >> 1;
525     FilteringEdgeChromaIntraV (pfDeblocking, pFilter, pDestCb, pDestCr, iLineSize, NULL);
526   }
527 
528   pFilter->uiChromaQP   = iCurQp;
529   GET_ALPHA_BETA_FROM_QP (pFilter->uiChromaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIdexA, iAlpha,
530                           iBeta);
531   if (iAlpha | iBeta) {
532     TC0_TBL_LOOKUP (iTc, iIdexA, uiBSx4, 1);
533     pfDeblocking->pfChromaDeblockingLT4Hor (&pDestCb[2 << 1], &pDestCr[2 << 1], iLineSize, iAlpha, iBeta, iTc);
534   }
535 
536   // chroma h
537   if (iTopFlag) {
538     pFilter->uiChromaQP = (iCurQp + (pCurMb - iMbStride)->uiChromaQp + 1) >> 1;
539     FilteringEdgeChromaIntraH (pfDeblocking, pFilter, pDestCb, pDestCr, iLineSize, NULL);
540   }
541 
542   pFilter->uiChromaQP   = iCurQp;
543   if (iAlpha | iBeta) {
544     pfDeblocking->pfChromaDeblockingLT4Ver (&pDestCb[ (2 << 1)*iLineSize], &pDestCr[ (2 << 1)*iLineSize], iLineSize, iAlpha,
545                                             iBeta, iTc);
546   }
547 }
548 
549 // merge h&v lookup table operation to save performance
DeblockingIntraMb(DeblockingFunc * pfDeblocking,SMB * pCurMb,SDeblockingFilter * pFilter)550 void DeblockingIntraMb (DeblockingFunc* pfDeblocking, SMB* pCurMb, SDeblockingFilter* pFilter) {
551   FilteringEdgeLumaHV (pfDeblocking, pCurMb, pFilter);
552   FilteringEdgeChromaHV (pfDeblocking, pCurMb, pFilter);
553 }
554 
555 #if defined(HAVE_NEON) && defined(SINGLE_REF_FRAME)
DeblockingBSCalc_neon(SWelsFuncPtrList * pFunc,SMB * pCurMb,uint8_t uiBS[2][4][4],Mb_Type uiCurMbType,int32_t iMbStride,int32_t iLeftFlag,int32_t iTopFlag)556 void DeblockingBSCalc_neon (SWelsFuncPtrList* pFunc, SMB* pCurMb, uint8_t uiBS[2][4][4], Mb_Type uiCurMbType,
557                             int32_t iMbStride, int32_t iLeftFlag, int32_t iTopFlag) {
558   DeblockingBSCalcEnc_neon (pCurMb->pNonZeroCount, pCurMb->sMv,
559                             (iLeftFlag ? LEFT_MB_POS : 0) | (iTopFlag ? TOP_MB_POS : 0), iMbStride, uiBS);
560   if (iLeftFlag) {
561     if (IS_INTRA ((pCurMb - 1)->uiMbType)) {
562       * (uint32_t*)uiBS[0][0] = 0x04040404;
563     }
564   } else {
565     * (uint32_t*)uiBS[0][0] = 0;
566   }
567   if (iTopFlag) {
568     if (IS_INTRA ((pCurMb - iMbStride)->uiMbType)) {
569       * (uint32_t*)uiBS[1][0] = 0x04040404;
570     }
571   } else {
572     * (uint32_t*)uiBS[1][0] = 0;
573   }
574 }
575 #endif
576 
577 #if defined(HAVE_NEON_AARCH64) && defined(SINGLE_REF_FRAME)
DeblockingBSCalc_AArch64_neon(SWelsFuncPtrList * pFunc,SMB * pCurMb,uint8_t uiBS[2][4][4],Mb_Type uiCurMbType,int32_t iMbStride,int32_t iLeftFlag,int32_t iTopFlag)578 void DeblockingBSCalc_AArch64_neon (SWelsFuncPtrList* pFunc, SMB* pCurMb, uint8_t uiBS[2][4][4], Mb_Type uiCurMbType,
579                                     int32_t iMbStride, int32_t iLeftFlag, int32_t iTopFlag) {
580   DeblockingBSCalcEnc_AArch64_neon (pCurMb->pNonZeroCount, pCurMb->sMv,
581                                     (iLeftFlag ? LEFT_MB_POS : 0) | (iTopFlag ? TOP_MB_POS : 0), iMbStride, uiBS);
582   if (iLeftFlag) {
583     if (IS_INTRA ((pCurMb - 1)->uiMbType)) {
584       * (uint32_t*)uiBS[0][0] = 0x04040404;
585     }
586   } else {
587     * (uint32_t*)uiBS[0][0] = 0;
588   }
589   if (iTopFlag) {
590     if (IS_INTRA ((pCurMb - iMbStride)->uiMbType)) {
591       * (uint32_t*)uiBS[1][0] = 0x04040404;
592     }
593   } else {
594     * (uint32_t*)uiBS[1][0] = 0;
595   }
596 }
597 #endif
598 
DeblockingBSCalc_c(SWelsFuncPtrList * pFunc,SMB * pCurMb,uint8_t uiBS[2][4][4],Mb_Type uiCurMbType,int32_t iMbStride,int32_t iLeftFlag,int32_t iTopFlag)599 void DeblockingBSCalc_c (SWelsFuncPtrList* pFunc, SMB* pCurMb, uint8_t uiBS[2][4][4], Mb_Type uiCurMbType,
600                          int32_t iMbStride, int32_t iLeftFlag, int32_t iTopFlag) {
601   if (iLeftFlag) {
602     * (uint32_t*)uiBS[0][0] = IS_INTRA ((pCurMb - 1)->uiMbType) ? 0x04040404 : DeblockingBSMarginalMBAvcbase (pCurMb,
603                               pCurMb - 1, 0);
604   } else {
605     * (uint32_t*)uiBS[0][0] = 0;
606   }
607   if (iTopFlag) {
608     * (uint32_t*)uiBS[1][0] = IS_INTRA ((pCurMb - iMbStride)->uiMbType) ? 0x04040404 : DeblockingBSMarginalMBAvcbase (
609                                 pCurMb, (pCurMb - iMbStride), 1);
610   } else {
611     * (uint32_t*)uiBS[1][0] = 0;
612   }
613   //SKIP MB_16x16 or others
614   if (uiCurMbType != MB_TYPE_SKIP) {
615     pFunc->pfSetNZCZero (pCurMb->pNonZeroCount); // set all none-zero nzc to 1; dbk can be opti!
616 
617     if (uiCurMbType == MB_TYPE_16x16) {
618       DeblockingBSInsideMBAvsbase (pCurMb->pNonZeroCount, uiBS, 1);
619     } else {
620       DeblockingBSInsideMBNormal (pCurMb, uiBS, pCurMb->pNonZeroCount);
621     }
622   } else {
623     * (uint32_t*)uiBS[0][1] = * (uint32_t*)uiBS[0][2] = * (uint32_t*)uiBS[0][3] =
624                                 * (uint32_t*)uiBS[1][1] = * (uint32_t*)uiBS[1][2] = * (uint32_t*)uiBS[1][3] = 0;
625   }
626 }
627 
DeblockingMbAvcbase(SWelsFuncPtrList * pFunc,SMB * pCurMb,SDeblockingFilter * pFilter)628 void DeblockingMbAvcbase (SWelsFuncPtrList* pFunc, SMB* pCurMb, SDeblockingFilter* pFilter) {
629   uint8_t uiBS[2][4][4] = {{{ 0 }}};
630 
631   Mb_Type uiCurMbType = pCurMb->uiMbType;
632   int32_t iMbStride  = pFilter->iMbStride;
633 
634   int32_t iMbX = pCurMb->iMbX;
635   int32_t iMbY = pCurMb->iMbY;
636 
637   bool bLeftBsValid[2] = { (iMbX > 0), ((iMbX > 0)&& (pCurMb->uiSliceIdc == (pCurMb - 1)->uiSliceIdc))};
638   bool bTopBsValid[2]  = { (iMbY > 0), ((iMbY > 0)&& (pCurMb->uiSliceIdc == (pCurMb - iMbStride)->uiSliceIdc))};
639 
640   int32_t iLeftFlag = bLeftBsValid[pFilter->uiFilterIdc];
641   int32_t iTopFlag  = bTopBsValid[pFilter->uiFilterIdc];
642 
643   switch (uiCurMbType) {
644   case MB_TYPE_INTRA4x4:
645   case MB_TYPE_INTRA16x16:
646   case MB_TYPE_INTRA_PCM:
647     DeblockingIntraMb (&pFunc->pfDeblocking, pCurMb, pFilter);
648     break;
649   default:
650     pFunc->pfDeblocking.pfDeblockingBSCalc (pFunc, pCurMb, uiBS, uiCurMbType, iMbStride, iLeftFlag, iTopFlag);
651     DeblockingInterMb (&pFunc->pfDeblocking, pCurMb, pFilter, uiBS);
652     break;
653   }
654 }
655 
DeblockingFilterFrameAvcbase(SDqLayer * pCurDq,SWelsFuncPtrList * pFunc)656 void  DeblockingFilterFrameAvcbase (SDqLayer* pCurDq, SWelsFuncPtrList* pFunc) {
657   int32_t i, j;
658   const int32_t kiMbWidth   = pCurDq->iMbWidth;
659   const int32_t kiMbHeight  = pCurDq->iMbHeight;
660   SMB* pCurrentMbBlock      = pCurDq->sMbDataP;
661   SSliceHeaderExt* sSliceHeaderExt = &pCurDq->ppSliceInLayer[0]->sSliceHeaderExt;
662   SDeblockingFilter pFilter;
663 
664   /* Step1: parameters set */
665   if (sSliceHeaderExt->sSliceHeader.uiDisableDeblockingFilterIdc == 1)
666     return;
667 
668   pFilter.uiFilterIdc = (sSliceHeaderExt->sSliceHeader.uiDisableDeblockingFilterIdc != 0);
669 
670   pFilter.iCsStride[0] = pCurDq->pDecPic->iLineSize[0];
671   pFilter.iCsStride[1] = pCurDq->pDecPic->iLineSize[1];
672   pFilter.iCsStride[2] = pCurDq->pDecPic->iLineSize[2];
673 
674   pFilter.iSliceAlphaC0Offset = sSliceHeaderExt->sSliceHeader.iSliceAlphaC0Offset;
675   pFilter.iSliceBetaOffset     = sSliceHeaderExt->sSliceHeader.iSliceBetaOffset;
676 
677   pFilter.iMbStride = kiMbWidth;
678 
679   for (j = 0; j < kiMbHeight; ++j) {
680     pFilter.pCsData[0] = pCurDq->pDecPic->pData[0] + ((j * pFilter.iCsStride[0]) << 4);
681     pFilter.pCsData[1] = pCurDq->pDecPic->pData[1] + ((j * pFilter.iCsStride[1]) << 3);
682     pFilter.pCsData[2] = pCurDq->pDecPic->pData[2] + ((j * pFilter.iCsStride[2]) << 3);
683     for (i = 0; i < kiMbWidth; i++) {
684       DeblockingMbAvcbase (pFunc, pCurrentMbBlock, &pFilter);
685       ++pCurrentMbBlock;
686       pFilter.pCsData[0] += MB_WIDTH_LUMA;
687       pFilter.pCsData[1] += MB_WIDTH_CHROMA;
688       pFilter.pCsData[2] += MB_WIDTH_CHROMA;
689     }
690   }
691 }
692 
DeblockingFilterSliceAvcbase(SDqLayer * pCurDq,SWelsFuncPtrList * pFunc,SSlice * pSlice)693 void DeblockingFilterSliceAvcbase (SDqLayer* pCurDq, SWelsFuncPtrList* pFunc, SSlice* pSlice) {
694   SMB* pMbList                          = pCurDq->sMbDataP;
695   SSliceHeaderExt* sSliceHeaderExt      = &pSlice->sSliceHeaderExt;
696   SMB* pCurrentMbBlock;
697 
698   const int32_t kiMbWidth               = pCurDq->iMbWidth;
699   const int32_t kiMbHeight              = pCurDq->iMbHeight;
700   const int32_t kiTotalNumMb            = kiMbWidth * kiMbHeight;
701   int32_t iCurMbIdx = 0, iNextMbIdx = 0, iNumMbFiltered = 0;
702 
703   /* Step1: parameters set */
704   if (sSliceHeaderExt->sSliceHeader.uiDisableDeblockingFilterIdc == 1)
705     return;
706 
707   SDeblockingFilter pFilter;
708 
709   pFilter.uiFilterIdc = (sSliceHeaderExt->sSliceHeader.uiDisableDeblockingFilterIdc != 0);
710   pFilter.iCsStride[0] = pCurDq->pDecPic->iLineSize[0];
711   pFilter.iCsStride[1] = pCurDq->pDecPic->iLineSize[1];
712   pFilter.iCsStride[2] = pCurDq->pDecPic->iLineSize[2];
713   pFilter.iSliceAlphaC0Offset = sSliceHeaderExt->sSliceHeader.iSliceAlphaC0Offset;
714   pFilter.iSliceBetaOffset    = sSliceHeaderExt->sSliceHeader.iSliceBetaOffset;
715   pFilter.iMbStride           = kiMbWidth;
716 
717   iNextMbIdx  = sSliceHeaderExt->sSliceHeader.iFirstMbInSlice;
718 
719   for (; ;) {
720     iCurMbIdx       = iNextMbIdx;
721     pCurrentMbBlock = &pMbList[ iCurMbIdx ];
722 
723     pFilter.pCsData[0] = pCurDq->pDecPic->pData[0] + ((pCurrentMbBlock->iMbX + pCurrentMbBlock->iMbY * pFilter.iCsStride[0])
724                          << 4);
725     pFilter.pCsData[1] = pCurDq->pDecPic->pData[1] + ((pCurrentMbBlock->iMbX + pCurrentMbBlock->iMbY * pFilter.iCsStride[1])
726                          << 3);
727     pFilter.pCsData[2] = pCurDq->pDecPic->pData[2] + ((pCurrentMbBlock->iMbX + pCurrentMbBlock->iMbY * pFilter.iCsStride[2])
728                          << 3);
729 
730     DeblockingMbAvcbase (pFunc, pCurrentMbBlock, &pFilter);
731 
732     ++iNumMbFiltered;
733     iNextMbIdx = WelsGetNextMbOfSlice (pCurDq, iCurMbIdx);
734     //whether all of MB in current slice filtered or not
735     if (iNextMbIdx == -1 || iNextMbIdx >= kiTotalNumMb || iNumMbFiltered >= kiTotalNumMb) {
736       break;
737     }
738   }
739 }
740 
DeblockingFilterSliceAvcbaseNull(SDqLayer * pCurDq,SWelsFuncPtrList * pFunc,SSlice * pSlice)741 void DeblockingFilterSliceAvcbaseNull (SDqLayer* pCurDq, SWelsFuncPtrList* pFunc, SSlice* pSlice) {
742 }
743 
PerformDeblockingFilter(sWelsEncCtx * pEnc)744 void PerformDeblockingFilter (sWelsEncCtx* pEnc) {
745   SDqLayer* pCurLayer = pEnc->pCurDqLayer;
746   SSlice* pSlice      = NULL;
747 
748   if (pCurLayer->iLoopFilterDisableIdc == 0) {
749     DeblockingFilterFrameAvcbase (pCurLayer, pEnc->pFuncList);
750   } else if (pCurLayer->iLoopFilterDisableIdc == 2) {
751     int32_t iSliceCount = 0;
752     int32_t iSliceIdx   = 0;
753 
754     iSliceCount = GetCurrentSliceNum (pCurLayer);
755     do {
756       pSlice = pCurLayer->ppSliceInLayer[iSliceIdx];
757       assert (NULL != pSlice);
758       DeblockingFilterSliceAvcbase (pCurLayer, pEnc->pFuncList, pSlice);
759       ++ iSliceIdx;
760     } while (iSliceIdx < iSliceCount);
761   }
762 }
763 
WelsBlockFuncInit(PSetNoneZeroCountZeroFunc * pfSetNZCZero,int32_t iCpu)764 void WelsBlockFuncInit (PSetNoneZeroCountZeroFunc* pfSetNZCZero,  int32_t iCpu) {
765   *pfSetNZCZero = WelsNonZeroCount_c;
766 #ifdef HAVE_NEON
767   if (iCpu & WELS_CPU_NEON) {
768     *pfSetNZCZero = WelsNonZeroCount_neon;
769   }
770 #endif
771 #ifdef HAVE_NEON_AARCH64
772   if (iCpu & WELS_CPU_NEON) {
773     *pfSetNZCZero = WelsNonZeroCount_AArch64_neon;
774   }
775 #endif
776 #if defined(X86_ASM)
777   if (iCpu & WELS_CPU_SSE2) {
778     *pfSetNZCZero = WelsNonZeroCount_sse2;
779   }
780 #endif
781 #if defined(HAVE_MMI)
782   if (iCpu & WELS_CPU_MMI) {
783     *pfSetNZCZero = WelsNonZeroCount_mmi;
784   }
785 #endif
786 #if defined(HAVE_MSA)
787   if (iCpu & WELS_CPU_MSA) {
788     *pfSetNZCZero = WelsNonZeroCount_msa;
789   }
790 #endif
791 }
792 
DeblockingInit(DeblockingFunc * pFunc,int32_t iCpu)793 void  DeblockingInit (DeblockingFunc*   pFunc,  int32_t iCpu) {
794   pFunc->pfLumaDeblockingLT4Ver     = DeblockLumaLt4V_c;
795   pFunc->pfLumaDeblockingEQ4Ver     = DeblockLumaEq4V_c;
796   pFunc->pfLumaDeblockingLT4Hor     = DeblockLumaLt4H_c;
797   pFunc->pfLumaDeblockingEQ4Hor     = DeblockLumaEq4H_c;
798 
799   pFunc->pfChromaDeblockingLT4Ver   = DeblockChromaLt4V_c;
800   pFunc->pfChromaDeblockingEQ4Ver   = DeblockChromaEq4V_c;
801   pFunc->pfChromaDeblockingLT4Hor   = DeblockChromaLt4H_c;
802   pFunc->pfChromaDeblockingEQ4Hor   = DeblockChromaEq4H_c;
803 
804   pFunc->pfDeblockingBSCalc         = DeblockingBSCalc_c;
805 
806 
807 #ifdef X86_ASM
808   if (iCpu & WELS_CPU_SSSE3) {
809     pFunc->pfLumaDeblockingLT4Ver   = DeblockLumaLt4V_ssse3;
810     pFunc->pfLumaDeblockingEQ4Ver   = DeblockLumaEq4V_ssse3;
811     pFunc->pfLumaDeblockingLT4Hor   = DeblockLumaLt4H_ssse3;
812     pFunc->pfLumaDeblockingEQ4Hor   = DeblockLumaEq4H_ssse3;
813     pFunc->pfChromaDeblockingLT4Ver = DeblockChromaLt4V_ssse3;
814     pFunc->pfChromaDeblockingEQ4Ver = DeblockChromaEq4V_ssse3;
815     pFunc->pfChromaDeblockingLT4Hor = DeblockChromaLt4H_ssse3;
816     pFunc->pfChromaDeblockingEQ4Hor = DeblockChromaEq4H_ssse3;
817   }
818 #endif
819 
820 #if defined(HAVE_NEON)
821   if (iCpu & WELS_CPU_NEON) {
822     pFunc->pfLumaDeblockingLT4Ver   = DeblockLumaLt4V_neon;
823     pFunc->pfLumaDeblockingEQ4Ver   = DeblockLumaEq4V_neon;
824     pFunc->pfLumaDeblockingLT4Hor   = DeblockLumaLt4H_neon;
825     pFunc->pfLumaDeblockingEQ4Hor   = DeblockLumaEq4H_neon;
826 
827     pFunc->pfChromaDeblockingLT4Ver = DeblockChromaLt4V_neon;
828     pFunc->pfChromaDeblockingEQ4Ver = DeblockChromaEq4V_neon;
829     pFunc->pfChromaDeblockingLT4Hor = DeblockChromaLt4H_neon;
830     pFunc->pfChromaDeblockingEQ4Hor = DeblockChromaEq4H_neon;
831 
832 #if defined(SINGLE_REF_FRAME)
833     pFunc->pfDeblockingBSCalc       = DeblockingBSCalc_neon;
834 #endif
835   }
836 #endif
837 
838 #if defined(HAVE_NEON_AARCH64)
839   if (iCpu & WELS_CPU_NEON) {
840     pFunc->pfLumaDeblockingLT4Ver   = DeblockLumaLt4V_AArch64_neon;
841     pFunc->pfLumaDeblockingEQ4Ver   = DeblockLumaEq4V_AArch64_neon;
842     pFunc->pfLumaDeblockingLT4Hor   = DeblockLumaLt4H_AArch64_neon;
843     pFunc->pfLumaDeblockingEQ4Hor   = DeblockLumaEq4H_AArch64_neon;
844 
845     pFunc->pfChromaDeblockingLT4Ver = DeblockChromaLt4V_AArch64_neon;
846     pFunc->pfChromaDeblockingEQ4Ver = DeblockChromaEq4V_AArch64_neon;
847     pFunc->pfChromaDeblockingLT4Hor = DeblockChromaLt4H_AArch64_neon;
848     pFunc->pfChromaDeblockingEQ4Hor = DeblockChromaEq4H_AArch64_neon;
849 
850 #if defined(SINGLE_REF_FRAME)
851     pFunc->pfDeblockingBSCalc       = DeblockingBSCalc_AArch64_neon;
852 #endif
853   }
854 #endif
855 
856 #if defined(HAVE_MMI)
857   if (iCpu & WELS_CPU_MMI) {
858     pFunc->pfLumaDeblockingLT4Ver   = DeblockLumaLt4V_mmi;
859     pFunc->pfLumaDeblockingEQ4Ver   = DeblockLumaEq4V_mmi;
860     pFunc->pfLumaDeblockingLT4Hor   = DeblockLumaLt4H_mmi;
861     pFunc->pfLumaDeblockingEQ4Hor   = DeblockLumaEq4H_mmi;
862     pFunc->pfChromaDeblockingLT4Ver = DeblockChromaLt4V_mmi;
863     pFunc->pfChromaDeblockingEQ4Ver = DeblockChromaEq4V_mmi;
864     pFunc->pfChromaDeblockingLT4Hor = DeblockChromaLt4H_mmi;
865     pFunc->pfChromaDeblockingEQ4Hor = DeblockChromaEq4H_mmi;
866   }
867 #endif//HAVE_MMI
868 
869 #if defined(HAVE_MSA)
870   if (iCpu & WELS_CPU_MSA) {
871     pFunc->pfLumaDeblockingLT4Ver   = DeblockLumaLt4V_msa;
872     pFunc->pfLumaDeblockingEQ4Ver   = DeblockLumaEq4V_msa;
873     pFunc->pfLumaDeblockingLT4Hor   = DeblockLumaLt4H_msa;
874     pFunc->pfLumaDeblockingEQ4Hor   = DeblockLumaEq4H_msa;
875     pFunc->pfChromaDeblockingLT4Ver = DeblockChromaLt4V_msa;
876     pFunc->pfChromaDeblockingEQ4Ver = DeblockChromaEq4V_msa;
877     pFunc->pfChromaDeblockingLT4Hor = DeblockChromaLt4H_msa;
878     pFunc->pfChromaDeblockingEQ4Hor = DeblockChromaEq4H_msa;
879   }
880 #endif//HAVE_MSA
881 }
882 
883 
884 } // namespace WelsEnc
885 
886