1 /*****************************************************************************
2 *
3 * XVID MPEG-4 VIDEO CODEC
4 * - Sum Of Absolute Difference related code -
5 *
6 * Copyright(C) 2001-2010 Peter Ross <pross@xvid.org>
7 * 2010 Michael Militzer <michael@xvid.org>
8 *
9 * This program is free software ; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation ; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY ; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program ; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 *
23 * $Id: sad.c 1985 2011-05-18 09:02:35Z Isibaar $
24 *
25 ****************************************************************************/
26
27 #include "../portab.h"
28 #include "../global.h"
29 #include "sad.h"
30
31 #include <stdlib.h>
32
33 sad16FuncPtr sad16;
34 sad8FuncPtr sad8;
35 sad16biFuncPtr sad16bi;
36 sad8biFuncPtr sad8bi; /* not really sad16, but no difference in prototype */
37 dev16FuncPtr dev16;
38 sad16vFuncPtr sad16v;
39 sse8Func_16bitPtr sse8_16bit;
40 sse8Func_8bitPtr sse8_8bit;
41
42 sseh8Func_16bitPtr sseh8_16bit;
43 coeff8_energyFunc_Ptr coeff8_energy;
44 blocksum8Func_Ptr blocksum8;
45
46 sadInitFuncPtr sadInit;
47
48
49 uint32_t
sad16_c(const uint8_t * const cur,const uint8_t * const ref,const uint32_t stride,const uint32_t best_sad)50 sad16_c(const uint8_t * const cur,
51 const uint8_t * const ref,
52 const uint32_t stride,
53 const uint32_t best_sad)
54 {
55
56 uint32_t sad = 0;
57 uint32_t j;
58 uint8_t const *ptr_cur = cur;
59 uint8_t const *ptr_ref = ref;
60
61 for (j = 0; j < 16; j++) {
62 sad += abs(ptr_cur[0] - ptr_ref[0]);
63 sad += abs(ptr_cur[1] - ptr_ref[1]);
64 sad += abs(ptr_cur[2] - ptr_ref[2]);
65 sad += abs(ptr_cur[3] - ptr_ref[3]);
66 sad += abs(ptr_cur[4] - ptr_ref[4]);
67 sad += abs(ptr_cur[5] - ptr_ref[5]);
68 sad += abs(ptr_cur[6] - ptr_ref[6]);
69 sad += abs(ptr_cur[7] - ptr_ref[7]);
70 sad += abs(ptr_cur[8] - ptr_ref[8]);
71 sad += abs(ptr_cur[9] - ptr_ref[9]);
72 sad += abs(ptr_cur[10] - ptr_ref[10]);
73 sad += abs(ptr_cur[11] - ptr_ref[11]);
74 sad += abs(ptr_cur[12] - ptr_ref[12]);
75 sad += abs(ptr_cur[13] - ptr_ref[13]);
76 sad += abs(ptr_cur[14] - ptr_ref[14]);
77 sad += abs(ptr_cur[15] - ptr_ref[15]);
78
79 if (sad >= best_sad)
80 return sad;
81
82 ptr_cur += stride;
83 ptr_ref += stride;
84
85 }
86
87 return sad;
88
89 }
90
91 uint32_t
sad16bi_c(const uint8_t * const cur,const uint8_t * const ref1,const uint8_t * const ref2,const uint32_t stride)92 sad16bi_c(const uint8_t * const cur,
93 const uint8_t * const ref1,
94 const uint8_t * const ref2,
95 const uint32_t stride)
96 {
97
98 uint32_t sad = 0;
99 uint32_t i, j;
100 uint8_t const *ptr_cur = cur;
101 uint8_t const *ptr_ref1 = ref1;
102 uint8_t const *ptr_ref2 = ref2;
103
104 for (j = 0; j < 16; j++) {
105
106 for (i = 0; i < 16; i++) {
107 int pixel = (ptr_ref1[i] + ptr_ref2[i] + 1) / 2;
108 sad += abs(ptr_cur[i] - pixel);
109 }
110
111 ptr_cur += stride;
112 ptr_ref1 += stride;
113 ptr_ref2 += stride;
114
115 }
116
117 return sad;
118
119 }
120
121 uint32_t
sad8bi_c(const uint8_t * const cur,const uint8_t * const ref1,const uint8_t * const ref2,const uint32_t stride)122 sad8bi_c(const uint8_t * const cur,
123 const uint8_t * const ref1,
124 const uint8_t * const ref2,
125 const uint32_t stride)
126 {
127
128 uint32_t sad = 0;
129 uint32_t i, j;
130 uint8_t const *ptr_cur = cur;
131 uint8_t const *ptr_ref1 = ref1;
132 uint8_t const *ptr_ref2 = ref2;
133
134 for (j = 0; j < 8; j++) {
135
136 for (i = 0; i < 8; i++) {
137 int pixel = (ptr_ref1[i] + ptr_ref2[i] + 1) / 2;
138 sad += abs(ptr_cur[i] - pixel);
139 }
140
141 ptr_cur += stride;
142 ptr_ref1 += stride;
143 ptr_ref2 += stride;
144
145 }
146
147 return sad;
148
149 }
150
151
152
153 uint32_t
sad8_c(const uint8_t * const cur,const uint8_t * const ref,const uint32_t stride)154 sad8_c(const uint8_t * const cur,
155 const uint8_t * const ref,
156 const uint32_t stride)
157 {
158 uint32_t sad = 0;
159 uint32_t j;
160 uint8_t const *ptr_cur = cur;
161 uint8_t const *ptr_ref = ref;
162
163 for (j = 0; j < 8; j++) {
164
165 sad += abs(ptr_cur[0] - ptr_ref[0]);
166 sad += abs(ptr_cur[1] - ptr_ref[1]);
167 sad += abs(ptr_cur[2] - ptr_ref[2]);
168 sad += abs(ptr_cur[3] - ptr_ref[3]);
169 sad += abs(ptr_cur[4] - ptr_ref[4]);
170 sad += abs(ptr_cur[5] - ptr_ref[5]);
171 sad += abs(ptr_cur[6] - ptr_ref[6]);
172 sad += abs(ptr_cur[7] - ptr_ref[7]);
173
174 ptr_cur += stride;
175 ptr_ref += stride;
176
177 }
178
179 return sad;
180 }
181
182
183 /* average deviation from mean */
184
185 uint32_t
dev16_c(const uint8_t * const cur,const uint32_t stride)186 dev16_c(const uint8_t * const cur,
187 const uint32_t stride)
188 {
189
190 uint32_t mean = 0;
191 uint32_t dev = 0;
192 uint32_t i, j;
193 uint8_t const *ptr_cur = cur;
194
195 for (j = 0; j < 16; j++) {
196
197 for (i = 0; i < 16; i++)
198 mean += *(ptr_cur + i);
199
200 ptr_cur += stride;
201
202 }
203
204 mean /= (16 * 16);
205 ptr_cur = cur;
206
207 for (j = 0; j < 16; j++) {
208
209 for (i = 0; i < 16; i++)
210 dev += abs(*(ptr_cur + i) - (int32_t) mean);
211
212 ptr_cur += stride;
213
214 }
215
216 return dev;
217 }
218
sad16v_c(const uint8_t * const cur,const uint8_t * const ref,const uint32_t stride,int32_t * sad)219 uint32_t sad16v_c(const uint8_t * const cur,
220 const uint8_t * const ref,
221 const uint32_t stride,
222 int32_t *sad)
223 {
224 sad[0] = sad8(cur, ref, stride);
225 sad[1] = sad8(cur + 8, ref + 8, stride);
226 sad[2] = sad8(cur + 8*stride, ref + 8*stride, stride);
227 sad[3] = sad8(cur + 8*stride + 8, ref + 8*stride + 8, stride);
228
229 return sad[0]+sad[1]+sad[2]+sad[3];
230 }
231
sad32v_c(const uint8_t * const cur,const uint8_t * const ref,const uint32_t stride,int32_t * sad)232 uint32_t sad32v_c(const uint8_t * const cur,
233 const uint8_t * const ref,
234 const uint32_t stride,
235 int32_t *sad)
236 {
237 sad[0] = sad16(cur, ref, stride, 256*4096);
238 sad[1] = sad16(cur + 16, ref + 16, stride, 256*4096);
239 sad[2] = sad16(cur + 16*stride, ref + 16*stride, stride, 256*4096);
240 sad[3] = sad16(cur + 16*stride + 16, ref + 16*stride + 16, stride, 256*4096);
241
242 return sad[0]+sad[1]+sad[2]+sad[3];
243 }
244
245
246
247 #define MRSAD16_CORRFACTOR 8
248 uint32_t
mrsad16_c(const uint8_t * const cur,const uint8_t * const ref,const uint32_t stride,const uint32_t best_sad)249 mrsad16_c(const uint8_t * const cur,
250 const uint8_t * const ref,
251 const uint32_t stride,
252 const uint32_t best_sad)
253 {
254
255 uint32_t sad = 0;
256 int32_t mean = 0;
257 uint32_t i, j;
258 uint8_t const *ptr_cur = cur;
259 uint8_t const *ptr_ref = ref;
260
261 for (j = 0; j < 16; j++) {
262 for (i = 0; i < 16; i++) {
263 mean += ((int) *(ptr_cur + i) - (int) *(ptr_ref + i));
264 }
265 ptr_cur += stride;
266 ptr_ref += stride;
267
268 }
269 mean /= 256;
270
271 for (j = 0; j < 16; j++) {
272
273 ptr_cur -= stride;
274 ptr_ref -= stride;
275
276 for (i = 0; i < 16; i++) {
277
278 sad += abs(*(ptr_cur + i) - *(ptr_ref + i) - mean);
279 if (sad >= best_sad) {
280 return MRSAD16_CORRFACTOR * sad;
281 }
282 }
283 }
284
285 return MRSAD16_CORRFACTOR * sad;
286 }
287
288 uint32_t
sse8_16bit_c(const int16_t * b1,const int16_t * b2,const uint32_t stride)289 sse8_16bit_c(const int16_t * b1,
290 const int16_t * b2,
291 const uint32_t stride)
292 {
293 int i;
294 int sse = 0;
295
296 for (i=0; i<8; i++) {
297 sse += (b1[0] - b2[0])*(b1[0] - b2[0]);
298 sse += (b1[1] - b2[1])*(b1[1] - b2[1]);
299 sse += (b1[2] - b2[2])*(b1[2] - b2[2]);
300 sse += (b1[3] - b2[3])*(b1[3] - b2[3]);
301 sse += (b1[4] - b2[4])*(b1[4] - b2[4]);
302 sse += (b1[5] - b2[5])*(b1[5] - b2[5]);
303 sse += (b1[6] - b2[6])*(b1[6] - b2[6]);
304 sse += (b1[7] - b2[7])*(b1[7] - b2[7]);
305
306 b1 = (const int16_t*)((int8_t*)b1+stride);
307 b2 = (const int16_t*)((int8_t*)b2+stride);
308 }
309
310 return(sse);
311 }
312
313 uint32_t
sse8_8bit_c(const uint8_t * b1,const uint8_t * b2,const uint32_t stride)314 sse8_8bit_c(const uint8_t * b1,
315 const uint8_t * b2,
316 const uint32_t stride)
317 {
318 int i;
319 int sse = 0;
320
321 for (i=0; i<8; i++) {
322 sse += (b1[0] - b2[0])*(b1[0] - b2[0]);
323 sse += (b1[1] - b2[1])*(b1[1] - b2[1]);
324 sse += (b1[2] - b2[2])*(b1[2] - b2[2]);
325 sse += (b1[3] - b2[3])*(b1[3] - b2[3]);
326 sse += (b1[4] - b2[4])*(b1[4] - b2[4]);
327 sse += (b1[5] - b2[5])*(b1[5] - b2[5]);
328 sse += (b1[6] - b2[6])*(b1[6] - b2[6]);
329 sse += (b1[7] - b2[7])*(b1[7] - b2[7]);
330
331 b1 = b1+stride;
332 b2 = b2+stride;
333 }
334
335 return(sse);
336 }
337
338
339 /* PSNR-HVS-M helper functions */
340
341 static const int16_t iMask_Coeff[64] = {
342 0, 29788, 32767, 20479, 13653, 8192, 6425, 5372,
343 27306, 27306, 23405, 17246, 12603, 5650, 5461, 5958,
344 23405, 25205, 20479, 13653, 8192, 5749, 4749, 5851,
345 23405, 19275, 14894, 11299, 6425, 3766, 4096, 5285,
346 18204, 14894, 8856, 5851, 4819, 3006, 3181, 4255,
347 13653, 9362, 5958, 5120, 4045, 3151, 2900, 3562,
348 6687, 5120, 4201, 3766, 3181, 2708, 2730, 3244,
349 4551, 3562, 3449, 3344, 2926, 3277, 3181, 3310
350 };
351
352 /* Calculate CSF weighted energy of DCT coefficients */
353
354 uint32_t
coeff8_energy_c(const int16_t * dct)355 coeff8_energy_c(const int16_t * dct)
356 {
357 int x, y;
358 uint32_t sum_a = 0;
359
360 for (y = 0; y < 8; y += 2) {
361 for (x = 0; x < 8; x += 2) {
362 int16_t a0 = ((dct[y*8+x]<<4) * iMask_Coeff[y*8+x]) >> 16;
363 int16_t a1 = ((dct[y*8+x+1]<<4) * iMask_Coeff[y*8+x+1]) >> 16;
364 int16_t a2 = ((dct[(y+1)*8+x]<<4) * iMask_Coeff[(y+1)*8+x]) >> 16;
365 int16_t a3 = ((dct[(y+1)*8+x+1]<<4) * iMask_Coeff[(y+1)*8+x+1]) >> 16;
366
367 sum_a += ((a0*a0 + a1*a1 + a2*a2 + a3*a3) >> 3);
368 }
369 }
370
371 return sum_a;
372 }
373
374 /* Calculate MSE of DCT coeffs reduced by masking effect */
375
376 uint32_t
sseh8_16bit_c(const int16_t * cur,const int16_t * ref,uint16_t mask)377 sseh8_16bit_c(const int16_t * cur, const int16_t * ref, uint16_t mask)
378 {
379 int j, i;
380 uint32_t mse_h = 0;
381
382 for (j = 0; j < 8; j++) {
383 for (i = 0; i < 8; i++) {
384 uint32_t t = (mask * Inv_iMask_Coeff[j*8+i] + 32) >> 7;
385 uint16_t u = abs(cur[j*8+i] - ref[j*8+i]) << 4;
386 uint16_t thresh = (t < 65536) ? t : 65535;
387
388 if (u < thresh)
389 u = 0; /* The error is not perceivable */
390 else
391 u -= thresh;
392
393 u = ((u + iCSF_Round[j*8 + i]) * iCSF_Coeff[j*8 + i]) >> 16;
394
395 mse_h += (uint32_t) (u * u);
396 }
397 }
398
399 return mse_h;
400 }
401
402 /* Sums all pixels of 8x8 block */
403
404 uint32_t
blocksum8_c(const uint8_t * cur,int stride,uint16_t sums[4],uint32_t squares[4])405 blocksum8_c(const uint8_t * cur, int stride, uint16_t sums[4], uint32_t squares[4])
406 {
407 int i, j;
408 uint32_t sum = 0;
409
410 sums[0] = sums[1] = sums[2] = sums[3] = 0;
411 squares[0] = squares[1] = squares[2] = squares[3] = 0;
412
413 for (j = 0; j < 8; j++) {
414 for (i = 0; i < 8; i++) {
415 uint8_t p = cur[j*stride + i];
416
417 sums[(j>>2)*2 + (i>>2)] += p;
418 squares[(j>>2)*2 + (i>>2)] += p*p;
419
420 sum += p;
421 }
422 }
423
424 return sum;
425 }
426