1 /*****************************************************************************
2 * Copyright (C) 2013-2020 MulticoreWare, Inc
3 *
4 * Author: Gopu Govindaswamy <gopu@multicorewareinc.com>
5 *         Min Chen <chenm003@163.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
20 *
21 * This program is also available under a commercial proprietary license.
22 * For more information, contact us at license @ x265.com.
23 *****************************************************************************/
24 
25 #include "common.h"
26 #include "deblock.h"
27 #include "framedata.h"
28 #include "picyuv.h"
29 #include "slice.h"
30 #include "mv.h"
31 
32 using namespace X265_NS;
33 
34 #define DEBLOCK_SMALLEST_BLOCK  8
35 #define DEFAULT_INTRA_TC_OFFSET 2
36 
deblockCTU(const CUData * ctu,const CUGeom & cuGeom,int32_t dir)37 void Deblock::deblockCTU(const CUData* ctu, const CUGeom& cuGeom, int32_t dir)
38 {
39     uint8_t blockStrength[MAX_NUM_PARTITIONS];
40 
41     memset(blockStrength, 0, sizeof(uint8_t) * cuGeom.numPartitions);
42 
43     deblockCU(ctu, cuGeom, dir, blockStrength);
44 }
45 
bsCuEdge(const CUData * cu,uint32_t absPartIdx,int32_t dir)46 static inline uint8_t bsCuEdge(const CUData* cu, uint32_t absPartIdx, int32_t dir)
47 {
48     if (dir == Deblock::EDGE_VER)
49     {
50         if (cu->m_cuPelX + g_zscanToPelX[absPartIdx] > 0)
51         {
52             uint32_t    tempPartIdx;
53             const CUData* tempCU = cu->getPULeft(tempPartIdx, absPartIdx);
54             return tempCU ? 2 : 0;
55         }
56     }
57     else
58     {
59         if (cu->m_cuPelY + g_zscanToPelY[absPartIdx] > 0)
60         {
61             uint32_t    tempPartIdx;
62             const CUData* tempCU = cu->getPUAbove(tempPartIdx, absPartIdx);
63             return tempCU ? 2 : 0;
64         }
65     }
66 
67     return 0;
68 }
69 
70 /* Deblocking filter process in CU-based (the same function as conventional's)
71  * param Edge the direction of the edge in block boundary (horizonta/vertical), which is added newly */
deblockCU(const CUData * cu,const CUGeom & cuGeom,const int32_t dir,uint8_t blockStrength[])72 void Deblock::deblockCU(const CUData* cu, const CUGeom& cuGeom, const int32_t dir, uint8_t blockStrength[])
73 {
74     uint32_t absPartIdx = cuGeom.absPartIdx;
75     uint32_t depth = cuGeom.depth;
76     if (cu->m_predMode[absPartIdx] == MODE_NONE)
77         return;
78 
79     if (cu->m_cuDepth[absPartIdx] > depth)
80     {
81         for (uint32_t subPartIdx = 0; subPartIdx < 4; subPartIdx++)
82         {
83             const CUGeom& childGeom = *(&cuGeom + cuGeom.childOffset + subPartIdx);
84             if (childGeom.flags & CUGeom::PRESENT)
85                 deblockCU(cu, childGeom, dir, blockStrength);
86         }
87         return;
88     }
89 
90     uint32_t numUnits = 1 << (cuGeom.log2CUSize - LOG2_UNIT_SIZE);
91     setEdgefilterPU(cu, absPartIdx, dir, blockStrength, numUnits);
92     setEdgefilterTU(cu, absPartIdx, 0, dir, blockStrength);
93     setEdgefilterMultiple(absPartIdx, dir, 0, bsCuEdge(cu, absPartIdx, dir), blockStrength, numUnits);
94 
95     uint32_t numParts = cuGeom.numPartitions;
96     for (uint32_t partIdx = absPartIdx; partIdx < absPartIdx + numParts; partIdx++)
97     {
98         uint32_t bsCheck = !(partIdx & (1 << dir));
99 
100         if (bsCheck && blockStrength[partIdx])
101             blockStrength[partIdx] = getBoundaryStrength(cu, dir, partIdx, blockStrength);
102     }
103 
104     const uint32_t partIdxIncr = DEBLOCK_SMALLEST_BLOCK >> LOG2_UNIT_SIZE;
105     uint32_t shiftFactor = (dir == EDGE_VER) ? cu->m_hChromaShift : cu->m_vChromaShift;
106     uint32_t chromaMask = ((DEBLOCK_SMALLEST_BLOCK << shiftFactor) >> LOG2_UNIT_SIZE) - 1;
107     uint32_t e0 = (dir == EDGE_VER ? g_zscanToPelX[absPartIdx] : g_zscanToPelY[absPartIdx]) >> LOG2_UNIT_SIZE;
108 
109     for (uint32_t e = 0; e < numUnits; e += partIdxIncr)
110     {
111         edgeFilterLuma(cu, absPartIdx, depth, dir, e, blockStrength);
112         if (!((e0 + e) & chromaMask) && cu->m_chromaFormat != X265_CSP_I400)
113             edgeFilterChroma(cu, absPartIdx, depth, dir, e, blockStrength);
114     }
115 }
116 
calcBsIdx(uint32_t absPartIdx,int32_t dir,int32_t edgeIdx,int32_t baseUnitIdx)117 static inline uint32_t calcBsIdx(uint32_t absPartIdx, int32_t dir, int32_t edgeIdx, int32_t baseUnitIdx)
118 {
119     if (dir)
120         return g_rasterToZscan[g_zscanToRaster[absPartIdx] + (edgeIdx << LOG2_RASTER_SIZE) + baseUnitIdx];
121     else
122         return g_rasterToZscan[g_zscanToRaster[absPartIdx] + (baseUnitIdx << LOG2_RASTER_SIZE) + edgeIdx];
123 }
124 
setEdgefilterMultiple(uint32_t scanIdx,int32_t dir,int32_t edgeIdx,uint8_t value,uint8_t blockStrength[],uint32_t numUnits)125 void Deblock::setEdgefilterMultiple(uint32_t scanIdx, int32_t dir, int32_t edgeIdx, uint8_t value, uint8_t blockStrength[], uint32_t numUnits)
126 {
127     X265_CHECK(numUnits > 0, "numUnits edge filter check\n");
128     for (uint32_t i = 0; i < numUnits; i++)
129     {
130         const uint32_t bsidx = calcBsIdx(scanIdx, dir, edgeIdx, i);
131         blockStrength[bsidx] = value;
132     }
133 }
134 
setEdgefilterTU(const CUData * cu,uint32_t absPartIdx,uint32_t tuDepth,int32_t dir,uint8_t blockStrength[])135 void Deblock::setEdgefilterTU(const CUData* cu, uint32_t absPartIdx, uint32_t tuDepth, int32_t dir, uint8_t blockStrength[])
136 {
137     uint32_t log2TrSize = cu->m_log2CUSize[absPartIdx] - tuDepth;
138     if (cu->m_tuDepth[absPartIdx] > tuDepth)
139     {
140         uint32_t qNumParts = 1 << (log2TrSize - LOG2_UNIT_SIZE - 1) * 2;
141         for (uint32_t qIdx = 0; qIdx < 4; ++qIdx, absPartIdx += qNumParts)
142             setEdgefilterTU(cu, absPartIdx, tuDepth + 1, dir, blockStrength);
143         return;
144     }
145 
146     uint32_t numUnits = 1 << (log2TrSize - LOG2_UNIT_SIZE);
147     setEdgefilterMultiple(absPartIdx, dir, 0, 2, blockStrength, numUnits);
148 }
149 
setEdgefilterPU(const CUData * cu,uint32_t absPartIdx,int32_t dir,uint8_t blockStrength[],uint32_t numUnits)150 void Deblock::setEdgefilterPU(const CUData* cu, uint32_t absPartIdx, int32_t dir, uint8_t blockStrength[], uint32_t numUnits)
151 {
152     const uint32_t hNumUnits = numUnits >> 1;
153     const uint32_t qNumUnits = numUnits >> 2;
154 
155     switch (cu->m_partSize[absPartIdx])
156     {
157     case SIZE_2NxN:
158         if (EDGE_HOR == dir)
159             setEdgefilterMultiple(absPartIdx, dir, hNumUnits, 1, blockStrength, numUnits);
160         break;
161     case SIZE_Nx2N:
162         if (EDGE_VER == dir)
163             setEdgefilterMultiple(absPartIdx, dir, hNumUnits, 1, blockStrength, numUnits);
164         break;
165     case SIZE_NxN:
166         setEdgefilterMultiple(absPartIdx, dir, hNumUnits, 1, blockStrength, numUnits);
167         break;
168     case SIZE_2NxnU:
169         if (EDGE_HOR == dir)
170             setEdgefilterMultiple(absPartIdx, dir, qNumUnits, 1, blockStrength, numUnits);
171         break;
172     case SIZE_nLx2N:
173         if (EDGE_VER == dir)
174             setEdgefilterMultiple(absPartIdx, dir, qNumUnits, 1, blockStrength, numUnits);
175         break;
176     case SIZE_2NxnD:
177         if (EDGE_HOR == dir)
178             setEdgefilterMultiple(absPartIdx, dir, numUnits - qNumUnits, 1, blockStrength, numUnits);
179         break;
180     case SIZE_nRx2N:
181         if (EDGE_VER == dir)
182             setEdgefilterMultiple(absPartIdx, dir, numUnits - qNumUnits, 1, blockStrength, numUnits);
183         break;
184 
185     case SIZE_2Nx2N:
186     default:
187         break;
188     }
189 }
190 
getBoundaryStrength(const CUData * cuQ,int32_t dir,uint32_t partQ,const uint8_t blockStrength[])191 uint8_t Deblock::getBoundaryStrength(const CUData* cuQ, int32_t dir, uint32_t partQ, const uint8_t blockStrength[])
192 {
193     // Calculate block index
194     uint32_t partP;
195     const CUData* cuP = (dir == EDGE_VER ? cuQ->getPULeft(partP, partQ) : cuQ->getPUAbove(partP, partQ));
196 
197     // Set BS for Intra MB : BS = 2
198     if (cuP->isIntra(partP) || cuQ->isIntra(partQ))
199         return 2;
200 
201     // Set BS for not Intra MB : BS = 1 or 0
202     if (blockStrength[partQ] > 1 &&
203         (cuQ->getCbf(partQ, TEXT_LUMA, cuQ->m_tuDepth[partQ]) ||
204          cuP->getCbf(partP, TEXT_LUMA, cuP->m_tuDepth[partP])))
205         return 1;
206 
207     static const MV zeroMv(0, 0);
208     const Slice* const sliceQ = cuQ->m_slice;
209     const Slice* const sliceP = cuP->m_slice;
210     const Frame* refP0 = (cuP->m_refIdx[0][partP] >= 0) ? sliceP->m_refFrameList[0][cuP->m_refIdx[0][partP]] : NULL;
211     const Frame* refQ0 = (cuQ->m_refIdx[0][partQ] >= 0) ? sliceQ->m_refFrameList[0][cuQ->m_refIdx[0][partQ]] : NULL;
212     const MV& mvP0 = refP0 ? cuP->m_mv[0][partP] : zeroMv;
213     const MV& mvQ0 = refQ0 ? cuQ->m_mv[0][partQ] : zeroMv;
214     if (sliceQ->isInterP() && sliceP->isInterP())
215     {
216         return ((refP0 != refQ0) ||
217                 (abs(mvQ0.x - mvP0.x) >= 4) || (abs(mvQ0.y - mvP0.y) >= 4)) ? 1 : 0;
218     }
219     // (sliceQ->isInterB() || sliceP->isInterB())
220     const Frame* refP1 = (cuP->m_refIdx[1][partP] >= 0) ? sliceP->m_refFrameList[1][cuP->m_refIdx[1][partP]] : NULL;
221     const Frame* refQ1 = (cuQ->m_refIdx[1][partQ] >= 0) ? sliceQ->m_refFrameList[1][cuQ->m_refIdx[1][partQ]] : NULL;
222     const MV& mvP1 = refP1 ? cuP->m_mv[1][partP] : zeroMv;
223     const MV& mvQ1 = refQ1 ? cuQ->m_mv[1][partQ] : zeroMv;
224 
225     if (((refP0 == refQ0) && (refP1 == refQ1)) || ((refP0 == refQ1) && (refP1 == refQ0)))
226     {
227         if (refP0 != refP1) // Different L0 & L1
228         {
229             if (refP0 == refQ0)
230                 return ((abs(mvQ0.x - mvP0.x) >= 4) || (abs(mvQ0.y - mvP0.y) >= 4) ||
231                         (abs(mvQ1.x - mvP1.x) >= 4) || (abs(mvQ1.y - mvP1.y) >= 4)) ? 1 : 0;
232             else
233                 return ((abs(mvQ1.x - mvP0.x) >= 4) || (abs(mvQ1.y - mvP0.y) >= 4) ||
234                         (abs(mvQ0.x - mvP1.x) >= 4) || (abs(mvQ0.y - mvP1.y) >= 4)) ? 1 : 0;
235         }
236         else // Same L0 & L1
237         {
238             return (((abs(mvQ0.x - mvP0.x) >= 4) || (abs(mvQ0.y - mvP0.y) >= 4) ||
239                      (abs(mvQ1.x - mvP1.x) >= 4) || (abs(mvQ1.y - mvP1.y) >= 4)) &&
240                     ((abs(mvQ1.x - mvP0.x) >= 4) || (abs(mvQ1.y - mvP0.y) >= 4) ||
241                      (abs(mvQ0.x - mvP1.x) >= 4) || (abs(mvQ0.y - mvP1.y) >= 4))) ? 1 : 0;
242         }
243     }
244 
245     // for all different Ref_Idx
246     return 1;
247 }
248 
calcDP(pixel * src,intptr_t offset)249 static inline int32_t calcDP(pixel* src, intptr_t offset)
250 {
251     return abs(static_cast<int32_t>(src[-offset * 3]) - 2 * src[-offset * 2] + src[-offset]);
252 }
253 
calcDQ(pixel * src,intptr_t offset)254 static inline int32_t calcDQ(pixel* src, intptr_t offset)
255 {
256     return abs(static_cast<int32_t>(src[0]) - 2 * src[offset] + src[offset * 2]);
257 }
258 
useStrongFiltering(intptr_t offset,int32_t beta,int32_t tc,pixel * src)259 static inline bool useStrongFiltering(intptr_t offset, int32_t beta, int32_t tc, pixel* src)
260 {
261     int16_t m4     = (int16_t)src[0];
262     int16_t m3     = (int16_t)src[-offset];
263     int16_t m7     = (int16_t)src[offset * 3];
264     int16_t m0     = (int16_t)src[-offset * 4];
265     int32_t strong = abs(m0 - m3) + abs(m7 - m4);
266 
267     return (strong < (beta >> 3)) && (abs(m3 - m4) < ((tc * 5 + 1) >> 1));
268 }
269 
270 /* Deblocking for the luminance component with strong or weak filter
271  * \param src     pointer to picture data
272  * \param offset  offset value for picture data
273  * \param tc      tc value
274  * \param maskP   indicator to enable filtering on partP
275  * \param maskQ   indicator to enable filtering on partQ
276  * \param maskP1  decision weak filter/no filter for partP
277  * \param maskQ1  decision weak filter/no filter for partQ */
pelFilterLuma(pixel * src,intptr_t srcStep,intptr_t offset,int32_t tc,int32_t maskP,int32_t maskQ,int32_t maskP1,int32_t maskQ1)278 static inline void pelFilterLuma(pixel* src, intptr_t srcStep, intptr_t offset, int32_t tc, int32_t maskP, int32_t maskQ,
279                                  int32_t maskP1, int32_t maskQ1)
280 {
281     int32_t thrCut = tc * 10;
282     int32_t tc2 = tc >> 1;
283     maskP1 &= maskP;
284     maskQ1 &= maskQ;
285 
286     for (int32_t i = 0; i < UNIT_SIZE; i++, src += srcStep)
287     {
288         int16_t m4  = (int16_t)src[0];
289         int16_t m3  = (int16_t)src[-offset];
290         int16_t m5  = (int16_t)src[offset];
291         int16_t m2  = (int16_t)src[-offset * 2];
292 
293         int32_t delta = (9 * (m4 - m3) - 3 * (m5 - m2) + 8) >> 4;
294 
295         if (abs(delta) < thrCut)
296         {
297             delta = x265_clip3(-tc, tc, delta);
298 
299             src[-offset] = x265_clip(m3 + (delta & maskP));
300             src[0] = x265_clip(m4 - (delta & maskQ));
301             if (maskP1)
302             {
303                 int16_t m1  = (int16_t)src[-offset * 3];
304                 int32_t delta1 = x265_clip3(-tc2, tc2, ((((m1 + m3 + 1) >> 1) - m2 + delta) >> 1));
305                 src[-offset * 2] = x265_clip(m2 + delta1);
306             }
307             if (maskQ1)
308             {
309                 int16_t m6  = (int16_t)src[offset * 2];
310                 int32_t delta2 = x265_clip3(-tc2, tc2, ((((m6 + m4 + 1) >> 1) - m5 - delta) >> 1));
311                 src[offset] = x265_clip(m5 + delta2);
312             }
313         }
314     }
315 }
316 
edgeFilterLuma(const CUData * cuQ,uint32_t absPartIdx,uint32_t depth,int32_t dir,int32_t edge,const uint8_t blockStrength[])317 void Deblock::edgeFilterLuma(const CUData* cuQ, uint32_t absPartIdx, uint32_t depth, int32_t dir, int32_t edge, const uint8_t blockStrength[])
318 {
319     PicYuv* reconPic = cuQ->m_encData->m_reconPic;
320     pixel* src = reconPic->getLumaAddr(cuQ->m_cuAddr, absPartIdx);
321     intptr_t stride = reconPic->m_stride;
322     const PPS* pps = cuQ->m_slice->m_pps;
323 
324     intptr_t offset, srcStep;
325 
326     int32_t maskP = -1;
327     int32_t maskQ = -1;
328     int32_t betaOffset = pps->deblockingFilterBetaOffsetDiv2 << 1;
329     int32_t tcOffset = pps->deblockingFilterTcOffsetDiv2 << 1;
330     bool bCheckNoFilter = pps->bTransquantBypassEnabled;
331 
332     if (dir == EDGE_VER)
333     {
334         offset = 1;
335         srcStep = stride;
336         src += (edge << LOG2_UNIT_SIZE);
337     }
338     else // (dir == EDGE_HOR)
339     {
340         offset = stride;
341         srcStep = 1;
342         src += (edge << LOG2_UNIT_SIZE) * stride;
343     }
344 
345     uint32_t numUnits = cuQ->m_slice->m_sps->numPartInCUSize >> depth;
346     for (uint32_t idx = 0; idx < numUnits; idx++)
347     {
348         uint32_t partQ = calcBsIdx(absPartIdx, dir, edge, idx);
349         uint32_t bs = blockStrength[partQ];
350 
351         if (!bs)
352             continue;
353 
354         // Derive neighboring PU index
355         uint32_t partP;
356         const CUData* cuP = (dir == EDGE_VER ? cuQ->getPULeft(partP, partQ) : cuQ->getPUAbove(partP, partQ));
357 
358         if (bCheckNoFilter)
359         {
360             // check if each of PUs is lossless coded
361             maskP = cuP->m_tqBypass[partP] - 1;
362             maskQ = cuQ->m_tqBypass[partQ] - 1;
363             if (!(maskP | maskQ))
364                 continue;
365         }
366 
367         int32_t qpQ = cuQ->m_qp[partQ];
368         int32_t qpP = cuP->m_qp[partP];
369         int32_t qp  = (qpP + qpQ + 1) >> 1;
370 
371         int32_t indexB = x265_clip3(0, QP_MAX_SPEC, qp + betaOffset);
372 
373         const int32_t bitdepthShift = X265_DEPTH - 8;
374         int32_t beta = s_betaTable[indexB] << bitdepthShift;
375 
376         intptr_t unitOffset = idx * srcStep << LOG2_UNIT_SIZE;
377         int32_t dp0 = calcDP(src + unitOffset              , offset);
378         int32_t dq0 = calcDQ(src + unitOffset              , offset);
379         int32_t dp3 = calcDP(src + unitOffset + srcStep * 3, offset);
380         int32_t dq3 = calcDQ(src + unitOffset + srcStep * 3, offset);
381         int32_t d0 = dp0 + dq0;
382         int32_t d3 = dp3 + dq3;
383 
384         int32_t d =  d0 + d3;
385 
386         if (d >= beta)
387             continue;
388 
389         int32_t indexTC = x265_clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET * (bs - 1) + tcOffset));
390         int32_t tc = s_tcTable[indexTC] << bitdepthShift;
391 
392         bool sw = (2 * d0 < (beta >> 2) &&
393                    2 * d3 < (beta >> 2) &&
394                    useStrongFiltering(offset, beta, tc, src + unitOffset              ) &&
395                    useStrongFiltering(offset, beta, tc, src + unitOffset + srcStep * 3));
396 
397         if (sw)
398         {
399             int32_t tc2 = 2 * tc;
400             int32_t tcP = (tc2 & maskP);
401             int32_t tcQ = (tc2 & maskQ);
402             primitives.pelFilterLumaStrong[dir](src + unitOffset, srcStep, offset, tcP, tcQ);
403         }
404         else
405         {
406             int32_t sideThreshold = (beta + (beta >> 1)) >> 3;
407             int32_t dp = dp0 + dp3;
408             int32_t dq = dq0 + dq3;
409             int32_t maskP1 = (dp < sideThreshold ? -1 : 0);
410             int32_t maskQ1 = (dq < sideThreshold ? -1 : 0);
411 
412             pelFilterLuma(src + unitOffset, srcStep, offset, tc, maskP, maskQ, maskP1, maskQ1);
413         }
414     }
415 }
416 
edgeFilterChroma(const CUData * cuQ,uint32_t absPartIdx,uint32_t depth,int32_t dir,int32_t edge,const uint8_t blockStrength[])417 void Deblock::edgeFilterChroma(const CUData* cuQ, uint32_t absPartIdx, uint32_t depth, int32_t dir, int32_t edge, const uint8_t blockStrength[])
418 {
419     int32_t chFmt = cuQ->m_chromaFormat, chromaShift;
420     intptr_t offset, srcStep;
421     const PPS* pps = cuQ->m_slice->m_pps;
422 
423     int32_t maskP = -1;
424     int32_t maskQ = -1;
425     int32_t tcOffset = pps->deblockingFilterTcOffsetDiv2 << 1;
426 
427     X265_CHECK(((dir == EDGE_VER)
428                 ? ((g_zscanToPelX[absPartIdx] + edge * UNIT_SIZE) >> cuQ->m_hChromaShift)
429                 : ((g_zscanToPelY[absPartIdx] + edge * UNIT_SIZE) >> cuQ->m_vChromaShift)) % DEBLOCK_SMALLEST_BLOCK == 0,
430                "invalid edge\n");
431 
432     PicYuv* reconPic = cuQ->m_encData->m_reconPic;
433     intptr_t stride = reconPic->m_strideC;
434     intptr_t srcOffset = reconPic->getChromaAddrOffset(cuQ->m_cuAddr, absPartIdx);
435     bool bCheckNoFilter = pps->bTransquantBypassEnabled;
436 
437     if (dir == EDGE_VER)
438     {
439         chromaShift = cuQ->m_vChromaShift;
440         srcOffset += (edge << (LOG2_UNIT_SIZE - cuQ->m_hChromaShift));
441         offset     = 1;
442         srcStep    = stride;
443     }
444     else // (dir == EDGE_HOR)
445     {
446         chromaShift = cuQ->m_hChromaShift;
447         srcOffset += edge * stride << (LOG2_UNIT_SIZE - cuQ->m_vChromaShift);
448         offset     = stride;
449         srcStep    = 1;
450     }
451 
452     pixel* srcChroma[2];
453     srcChroma[0] = reconPic->m_picOrg[1] + srcOffset;
454     srcChroma[1] = reconPic->m_picOrg[2] + srcOffset;
455 
456     uint32_t numUnits = cuQ->m_slice->m_sps->numPartInCUSize >> (depth + chromaShift);
457     for (uint32_t idx = 0; idx < numUnits; idx++)
458     {
459         uint32_t partQ = calcBsIdx(absPartIdx, dir, edge, idx << chromaShift);
460         uint32_t bs = blockStrength[partQ];
461 
462         if (bs <= 1)
463             continue;
464 
465         // Derive neighboring PU index
466         uint32_t partP;
467         const CUData* cuP = (dir == EDGE_VER ? cuQ->getPULeft(partP, partQ) : cuQ->getPUAbove(partP, partQ));
468 
469         if (bCheckNoFilter)
470         {
471             // check if each of PUs is lossless coded
472             maskP = (cuP->m_tqBypass[partP] ? 0 : -1);
473             maskQ = (cuQ->m_tqBypass[partQ] ? 0 : -1);
474             if (!(maskP | maskQ))
475                 continue;
476         }
477 
478         int32_t qpQ = cuQ->m_qp[partQ];
479         int32_t qpP = cuP->m_qp[partP];
480         int32_t qpA = (qpP + qpQ + 1) >> 1;
481 
482         intptr_t unitOffset = idx * srcStep << LOG2_UNIT_SIZE;
483         for (uint32_t chromaIdx = 0; chromaIdx < 2; chromaIdx++)
484         {
485             int32_t qp = qpA + pps->chromaQpOffset[chromaIdx];
486             if (qp >= 30)
487                 qp = chFmt == X265_CSP_I420 ? g_chromaScale[qp] : X265_MIN(qp, QP_MAX_SPEC);
488 
489             int32_t indexTC = x265_clip3(0, QP_MAX_SPEC + DEFAULT_INTRA_TC_OFFSET, int32_t(qp + DEFAULT_INTRA_TC_OFFSET + tcOffset));
490             const int32_t bitdepthShift = X265_DEPTH - 8;
491             int32_t tc = s_tcTable[indexTC] << bitdepthShift;
492             pixel* srcC = srcChroma[chromaIdx];
493 
494             primitives.pelFilterChroma[dir](srcC + unitOffset, srcStep, offset, tc, maskP, maskQ);
495         }
496     }
497 }
498 
499 const uint8_t Deblock::s_tcTable[54] =
500 {
501     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2,
502     2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5, 6, 6, 7, 8, 9, 10, 11, 13, 14, 16, 18, 20, 22, 24
503 };
504 
505 const uint8_t Deblock::s_betaTable[52] =
506 {
507     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
508     18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64
509 };
510 
511