1 /*****************************************************************************
2 * Copyright (C) 2013-2020 MulticoreWare, Inc
3 *
4 * Authors: Praveen Kumar Tiwari <praveen@multicorewareinc.com>
5 *          Dnyaneshwar Gorade <dnyaneshwar@multicorewareinc.com>
6 *          Min Chen <chenm003@163.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111, USA.
21 *
22 * This program is also available under a commercial proprietary license.
23 * For more information, contact us at license @ x265.com.
24 *****************************************************************************/
25 
26 #include "common.h"
27 #include "primitives.h"
28 
29 #define PIXEL_MIN 0
30 
31 namespace {
32 
33 /* get the sign of input variable (TODO: this is a dup, make common) */
signOf(int x)34 inline int8_t signOf(int x)
35 {
36     return (x >> 31) | ((int)((((uint32_t)-x)) >> 31));
37 }
38 
calSign(int8_t * dst,const pixel * src1,const pixel * src2,const int endX)39 static void calSign(int8_t *dst, const pixel *src1, const pixel *src2, const int endX)
40 {
41     for (int x = 0; x < endX; x++)
42         dst[x] = signOf(src1[x] - src2[x]);
43 }
44 
processSaoCUE0(pixel * rec,int8_t * offsetEo,int width,int8_t * signLeft,intptr_t stride)45 static void processSaoCUE0(pixel * rec, int8_t * offsetEo, int width, int8_t* signLeft, intptr_t stride)
46 {
47     int x, y;
48     int8_t signRight, signLeft0;
49     int8_t edgeType;
50 
51     for (y = 0; y < 2; y++)
52     {
53         signLeft0 = signLeft[y];
54         for (x = 0; x < width; x++)
55         {
56             signRight = ((rec[x] - rec[x + 1]) < 0) ? -1 : ((rec[x] - rec[x + 1]) > 0) ? 1 : 0;
57             edgeType = signRight + signLeft0 + 2;
58             signLeft0 = -signRight;
59             rec[x] = x265_clip(rec[x] + offsetEo[edgeType]);
60         }
61         rec += stride;
62     }
63 }
64 
processSaoCUE1(pixel * rec,int8_t * upBuff1,int8_t * offsetEo,intptr_t stride,int width)65 static void processSaoCUE1(pixel* rec, int8_t* upBuff1, int8_t* offsetEo, intptr_t stride, int width)
66 {
67     int x;
68     int8_t signDown;
69     int edgeType;
70 
71     for (x = 0; x < width; x++)
72     {
73         signDown = signOf(rec[x] - rec[x + stride]);
74         edgeType = signDown + upBuff1[x] + 2;
75         upBuff1[x] = -signDown;
76         rec[x] = x265_clip(rec[x] + offsetEo[edgeType]);
77     }
78 }
79 
processSaoCUE1_2Rows(pixel * rec,int8_t * upBuff1,int8_t * offsetEo,intptr_t stride,int width)80 static void processSaoCUE1_2Rows(pixel* rec, int8_t* upBuff1, int8_t* offsetEo, intptr_t stride, int width)
81 {
82     int x, y;
83     int8_t signDown;
84     int edgeType;
85 
86     for (y = 0; y < 2; y++)
87     {
88         for (x = 0; x < width; x++)
89         {
90             signDown = signOf(rec[x] - rec[x + stride]);
91             edgeType = signDown + upBuff1[x] + 2;
92             upBuff1[x] = -signDown;
93             rec[x] = x265_clip(rec[x] + offsetEo[edgeType]);
94         }
95         rec += stride;
96     }
97 }
98 
processSaoCUE2(pixel * rec,int8_t * bufft,int8_t * buff1,int8_t * offsetEo,int width,intptr_t stride)99 static void processSaoCUE2(pixel * rec, int8_t * bufft, int8_t * buff1, int8_t * offsetEo, int width, intptr_t stride)
100 {
101     int x;
102     for (x = 0; x < width; x++)
103     {
104         int8_t signDown = signOf(rec[x] - rec[x + stride + 1]);
105         int edgeType = signDown + buff1[x] + 2;
106         bufft[x + 1] = -signDown;
107         rec[x] = x265_clip(rec[x] + offsetEo[edgeType]);;
108     }
109 }
110 
processSaoCUE3(pixel * rec,int8_t * upBuff1,int8_t * offsetEo,intptr_t stride,int startX,int endX)111 static void processSaoCUE3(pixel *rec, int8_t *upBuff1, int8_t *offsetEo, intptr_t stride, int startX, int endX)
112 {
113     int8_t signDown;
114     int8_t edgeType;
115 
116     for (int x = startX + 1; x < endX; x++)
117     {
118         signDown = signOf(rec[x] - rec[x + stride]);
119         edgeType = signDown + upBuff1[x] + 2;
120         upBuff1[x - 1] = -signDown;
121         rec[x] = x265_clip(rec[x] + offsetEo[edgeType]);
122     }
123 }
124 
processSaoCUB0(pixel * rec,const int8_t * offset,int ctuWidth,int ctuHeight,intptr_t stride)125 static void processSaoCUB0(pixel* rec, const int8_t* offset, int ctuWidth, int ctuHeight, intptr_t stride)
126 {
127     #define SAO_BO_BITS 5
128     const int boShift = X265_DEPTH - SAO_BO_BITS;
129     int x, y;
130     for (y = 0; y < ctuHeight; y++)
131     {
132         for (x = 0; x < ctuWidth; x++)
133         {
134             rec[x] = x265_clip(rec[x] + offset[rec[x] >> boShift]);
135         }
136         rec += stride;
137     }
138 }
139 
pelFilterLumaStrong_c(pixel * src,intptr_t srcStep,intptr_t offset,int32_t tcP,int32_t tcQ)140 static void pelFilterLumaStrong_c(pixel* src, intptr_t srcStep, intptr_t offset, int32_t tcP, int32_t tcQ)
141 {
142     for (int32_t i = 0; i < UNIT_SIZE; i++, src += srcStep)
143     {
144         int16_t m4  = (int16_t)src[0];
145         int16_t m3  = (int16_t)src[-offset];
146         int16_t m5  = (int16_t)src[offset];
147         int16_t m2  = (int16_t)src[-offset * 2];
148         int16_t m6  = (int16_t)src[offset * 2];
149         int16_t m1  = (int16_t)src[-offset * 3];
150         int16_t m7  = (int16_t)src[offset * 3];
151         int16_t m0  = (int16_t)src[-offset * 4];
152         src[-offset * 3] = (pixel)(x265_clip3(-tcP, tcP, ((2 * m0 + 3 * m1 + m2 + m3 + m4 + 4) >> 3) - m1) + m1);
153         src[-offset * 2] = (pixel)(x265_clip3(-tcP, tcP, ((m1 + m2 + m3 + m4 + 2) >> 2) - m2) + m2);
154         src[-offset]     = (pixel)(x265_clip3(-tcP, tcP, ((m1 + 2 * m2 + 2 * m3 + 2 * m4 + m5 + 4) >> 3) - m3) + m3);
155         src[0]           = (pixel)(x265_clip3(-tcQ, tcQ, ((m2 + 2 * m3 + 2 * m4 + 2 * m5 + m6 + 4) >> 3) - m4) + m4);
156         src[offset]      = (pixel)(x265_clip3(-tcQ, tcQ, ((m3 + m4 + m5 + m6 + 2) >> 2) - m5) + m5);
157         src[offset * 2]  = (pixel)(x265_clip3(-tcQ, tcQ, ((m3 + m4 + m5 + 3 * m6 + 2 * m7 + 4) >> 3) - m6) + m6);
158     }
159 }
160 
161 /* Deblocking of one line/column for the chrominance component
162 * \param src     pointer to picture data
163 * \param offset  offset value for picture data
164 * \param tc      tc value
165 * \param maskP   indicator to disable filtering on partP
166 * \param maskQ   indicator to disable filtering on partQ */
pelFilterChroma_c(pixel * src,intptr_t srcStep,intptr_t offset,int32_t tc,int32_t maskP,int32_t maskQ)167 static void pelFilterChroma_c(pixel* src, intptr_t srcStep, intptr_t offset, int32_t tc, int32_t maskP, int32_t maskQ)
168 {
169     for (int32_t i = 0; i < UNIT_SIZE; i++, src += srcStep)
170     {
171         int16_t m4 = (int16_t)src[0];
172         int16_t m3 = (int16_t)src[-offset];
173         int16_t m5 = (int16_t)src[offset];
174         int16_t m2 = (int16_t)src[-offset * 2];
175 
176         int32_t delta = x265_clip3(-tc, tc, ((((m4 - m3) * 4) + m2 - m5 + 4) >> 3));
177         src[-offset]  = x265_clip(m3 + (delta & maskP));
178         src[0]        = x265_clip(m4 - (delta & maskQ));
179     }
180 }
181 }
182 
183 namespace X265_NS {
setupLoopFilterPrimitives_c(EncoderPrimitives & p)184 void setupLoopFilterPrimitives_c(EncoderPrimitives &p)
185 {
186     p.saoCuOrgE0 = processSaoCUE0;
187     p.saoCuOrgE1 = processSaoCUE1;
188     p.saoCuOrgE1_2Rows = processSaoCUE1_2Rows;
189     p.saoCuOrgE2[0] = processSaoCUE2;
190     p.saoCuOrgE2[1] = processSaoCUE2;
191     p.saoCuOrgE3[0] = processSaoCUE3;
192     p.saoCuOrgE3[1] = processSaoCUE3;
193     p.saoCuOrgB0 = processSaoCUB0;
194     p.sign = calSign;
195 
196     // C code is same for EDGE_VER and EDGE_HOR only asm code is different
197     p.pelFilterLumaStrong[0] = pelFilterLumaStrong_c;
198     p.pelFilterLumaStrong[1] = pelFilterLumaStrong_c;
199     p.pelFilterChroma[0]     = pelFilterChroma_c;
200     p.pelFilterChroma[1]     = pelFilterChroma_c;
201 }
202 }
203