1 /*****************************************************************************
2  *
3  *  XVID MPEG-4 VIDEO CODEC
4  *  - Sum Of Absolute Difference related code -
5  *
6  *  Copyright(C) 2001-2010 Peter Ross <pross@xvid.org>
7  *               2010      Michael Militzer <michael@xvid.org>
8  *
9  *  This program is free software ; you can redistribute it and/or modify
10  *  it under the terms of the GNU General Public License as published by
11  *  the Free Software Foundation ; either version 2 of the License, or
12  *  (at your option) any later version.
13  *
14  *  This program is distributed in the hope that it will be useful,
15  *  but WITHOUT ANY WARRANTY ; without even the implied warranty of
16  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  *  GNU General Public License for more details.
18  *
19  *  You should have received a copy of the GNU General Public License
20  *  along with this program ; if not, write to the Free Software
21  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
22  *
23  * $Id: sad.c 1985 2011-05-18 09:02:35Z Isibaar $
24  *
25  ****************************************************************************/
26 
27 #include "../portab.h"
28 #include "../global.h"
29 #include "sad.h"
30 
31 #include <stdlib.h>
32 
33 sad16FuncPtr sad16;
34 sad8FuncPtr sad8;
35 sad16biFuncPtr sad16bi;
36 sad8biFuncPtr sad8bi;		/* not really sad16, but no difference in prototype */
37 dev16FuncPtr dev16;
38 sad16vFuncPtr sad16v;
39 sse8Func_16bitPtr sse8_16bit;
40 sse8Func_8bitPtr sse8_8bit;
41 
42 sseh8Func_16bitPtr sseh8_16bit;
43 coeff8_energyFunc_Ptr coeff8_energy;
44 blocksum8Func_Ptr blocksum8;
45 
46 sadInitFuncPtr sadInit;
47 
48 
49 uint32_t
sad16_c(const uint8_t * const cur,const uint8_t * const ref,const uint32_t stride,const uint32_t best_sad)50 sad16_c(const uint8_t * const cur,
51 		const uint8_t * const ref,
52 		const uint32_t stride,
53 		const uint32_t best_sad)
54 {
55 
56 	uint32_t sad = 0;
57 	uint32_t j;
58 	uint8_t const *ptr_cur = cur;
59 	uint8_t const *ptr_ref = ref;
60 
61 	for (j = 0; j < 16; j++) {
62 			sad += abs(ptr_cur[0] - ptr_ref[0]);
63 			sad += abs(ptr_cur[1] - ptr_ref[1]);
64 			sad += abs(ptr_cur[2] - ptr_ref[2]);
65 			sad += abs(ptr_cur[3] - ptr_ref[3]);
66 			sad += abs(ptr_cur[4] - ptr_ref[4]);
67 			sad += abs(ptr_cur[5] - ptr_ref[5]);
68 			sad += abs(ptr_cur[6] - ptr_ref[6]);
69 			sad += abs(ptr_cur[7] - ptr_ref[7]);
70 			sad += abs(ptr_cur[8] - ptr_ref[8]);
71 			sad += abs(ptr_cur[9] - ptr_ref[9]);
72 			sad += abs(ptr_cur[10] - ptr_ref[10]);
73 			sad += abs(ptr_cur[11] - ptr_ref[11]);
74 			sad += abs(ptr_cur[12] - ptr_ref[12]);
75 			sad += abs(ptr_cur[13] - ptr_ref[13]);
76 			sad += abs(ptr_cur[14] - ptr_ref[14]);
77 			sad += abs(ptr_cur[15] - ptr_ref[15]);
78 
79 			if (sad >= best_sad)
80 				return sad;
81 
82 			ptr_cur += stride;
83 			ptr_ref += stride;
84 
85 	}
86 
87 	return sad;
88 
89 }
90 
91 uint32_t
sad16bi_c(const uint8_t * const cur,const uint8_t * const ref1,const uint8_t * const ref2,const uint32_t stride)92 sad16bi_c(const uint8_t * const cur,
93 		  const uint8_t * const ref1,
94 		  const uint8_t * const ref2,
95 		  const uint32_t stride)
96 {
97 
98 	uint32_t sad = 0;
99 	uint32_t i, j;
100 	uint8_t const *ptr_cur = cur;
101 	uint8_t const *ptr_ref1 = ref1;
102 	uint8_t const *ptr_ref2 = ref2;
103 
104 	for (j = 0; j < 16; j++) {
105 
106 		for (i = 0; i < 16; i++) {
107 			int pixel = (ptr_ref1[i] + ptr_ref2[i] + 1) / 2;
108 			sad += abs(ptr_cur[i] - pixel);
109 		}
110 
111 		ptr_cur += stride;
112 		ptr_ref1 += stride;
113 		ptr_ref2 += stride;
114 
115 	}
116 
117 	return sad;
118 
119 }
120 
121 uint32_t
sad8bi_c(const uint8_t * const cur,const uint8_t * const ref1,const uint8_t * const ref2,const uint32_t stride)122 sad8bi_c(const uint8_t * const cur,
123 		  const uint8_t * const ref1,
124 		  const uint8_t * const ref2,
125 		  const uint32_t stride)
126 {
127 
128 	uint32_t sad = 0;
129 	uint32_t i, j;
130 	uint8_t const *ptr_cur = cur;
131 	uint8_t const *ptr_ref1 = ref1;
132 	uint8_t const *ptr_ref2 = ref2;
133 
134 	for (j = 0; j < 8; j++) {
135 
136 		for (i = 0; i < 8; i++) {
137 			int pixel = (ptr_ref1[i] + ptr_ref2[i] + 1) / 2;
138 			sad += abs(ptr_cur[i] - pixel);
139 		}
140 
141 		ptr_cur += stride;
142 		ptr_ref1 += stride;
143 		ptr_ref2 += stride;
144 
145 	}
146 
147 	return sad;
148 
149 }
150 
151 
152 
153 uint32_t
sad8_c(const uint8_t * const cur,const uint8_t * const ref,const uint32_t stride)154 sad8_c(const uint8_t * const cur,
155 	   const uint8_t * const ref,
156 	   const uint32_t stride)
157 {
158 	uint32_t sad = 0;
159 	uint32_t j;
160 	uint8_t const *ptr_cur = cur;
161 	uint8_t const *ptr_ref = ref;
162 
163 	for (j = 0; j < 8; j++) {
164 
165 		sad += abs(ptr_cur[0] - ptr_ref[0]);
166 		sad += abs(ptr_cur[1] - ptr_ref[1]);
167 		sad += abs(ptr_cur[2] - ptr_ref[2]);
168 		sad += abs(ptr_cur[3] - ptr_ref[3]);
169 		sad += abs(ptr_cur[4] - ptr_ref[4]);
170 		sad += abs(ptr_cur[5] - ptr_ref[5]);
171 		sad += abs(ptr_cur[6] - ptr_ref[6]);
172 		sad += abs(ptr_cur[7] - ptr_ref[7]);
173 
174 		ptr_cur += stride;
175 		ptr_ref += stride;
176 
177 	}
178 
179 	return sad;
180 }
181 
182 
183 /* average deviation from mean */
184 
185 uint32_t
dev16_c(const uint8_t * const cur,const uint32_t stride)186 dev16_c(const uint8_t * const cur,
187 		const uint32_t stride)
188 {
189 
190 	uint32_t mean = 0;
191 	uint32_t dev = 0;
192 	uint32_t i, j;
193 	uint8_t const *ptr_cur = cur;
194 
195 	for (j = 0; j < 16; j++) {
196 
197 		for (i = 0; i < 16; i++)
198 			mean += *(ptr_cur + i);
199 
200 		ptr_cur += stride;
201 
202 	}
203 
204 	mean /= (16 * 16);
205 	ptr_cur = cur;
206 
207 	for (j = 0; j < 16; j++) {
208 
209 		for (i = 0; i < 16; i++)
210 			dev += abs(*(ptr_cur + i) - (int32_t) mean);
211 
212 		ptr_cur += stride;
213 
214 	}
215 
216 	return dev;
217 }
218 
sad16v_c(const uint8_t * const cur,const uint8_t * const ref,const uint32_t stride,int32_t * sad)219 uint32_t sad16v_c(const uint8_t * const cur,
220 			   const uint8_t * const ref,
221 			   const uint32_t stride,
222 			   int32_t *sad)
223 {
224 	sad[0] = sad8(cur, ref, stride);
225 	sad[1] = sad8(cur + 8, ref + 8, stride);
226 	sad[2] = sad8(cur + 8*stride, ref + 8*stride, stride);
227 	sad[3] = sad8(cur + 8*stride + 8, ref + 8*stride + 8, stride);
228 
229 	return sad[0]+sad[1]+sad[2]+sad[3];
230 }
231 
sad32v_c(const uint8_t * const cur,const uint8_t * const ref,const uint32_t stride,int32_t * sad)232 uint32_t sad32v_c(const uint8_t * const cur,
233 			   const uint8_t * const ref,
234 			   const uint32_t stride,
235 			   int32_t *sad)
236 {
237 	sad[0] = sad16(cur, ref, stride, 256*4096);
238 	sad[1] = sad16(cur + 16, ref + 16, stride, 256*4096);
239 	sad[2] = sad16(cur + 16*stride, ref + 16*stride, stride, 256*4096);
240 	sad[3] = sad16(cur + 16*stride + 16, ref + 16*stride + 16, stride, 256*4096);
241 
242 	return sad[0]+sad[1]+sad[2]+sad[3];
243 }
244 
245 
246 
247 #define MRSAD16_CORRFACTOR 8
248 uint32_t
mrsad16_c(const uint8_t * const cur,const uint8_t * const ref,const uint32_t stride,const uint32_t best_sad)249 mrsad16_c(const uint8_t * const cur,
250 		  const uint8_t * const ref,
251 		  const uint32_t stride,
252 		  const uint32_t best_sad)
253 {
254 
255 	uint32_t sad = 0;
256 	int32_t mean = 0;
257 	uint32_t i, j;
258 	uint8_t const *ptr_cur = cur;
259 	uint8_t const *ptr_ref = ref;
260 
261 	for (j = 0; j < 16; j++) {
262 		for (i = 0; i < 16; i++) {
263 			mean += ((int) *(ptr_cur + i) - (int) *(ptr_ref + i));
264 		}
265 		ptr_cur += stride;
266 		ptr_ref += stride;
267 
268 	}
269 	mean /= 256;
270 
271 	for (j = 0; j < 16; j++) {
272 
273 		ptr_cur -= stride;
274 		ptr_ref -= stride;
275 
276 		for (i = 0; i < 16; i++) {
277 
278 			sad += abs(*(ptr_cur + i) - *(ptr_ref + i) - mean);
279 			if (sad >= best_sad) {
280 				return MRSAD16_CORRFACTOR * sad;
281 			}
282 		}
283 	}
284 
285 	return MRSAD16_CORRFACTOR * sad;
286 }
287 
288 uint32_t
sse8_16bit_c(const int16_t * b1,const int16_t * b2,const uint32_t stride)289 sse8_16bit_c(const int16_t * b1,
290 			 const int16_t * b2,
291 			 const uint32_t stride)
292 {
293 	int i;
294 	int sse = 0;
295 
296 	for (i=0; i<8; i++) {
297 		sse += (b1[0] - b2[0])*(b1[0] - b2[0]);
298 		sse += (b1[1] - b2[1])*(b1[1] - b2[1]);
299 		sse += (b1[2] - b2[2])*(b1[2] - b2[2]);
300 		sse += (b1[3] - b2[3])*(b1[3] - b2[3]);
301 		sse += (b1[4] - b2[4])*(b1[4] - b2[4]);
302 		sse += (b1[5] - b2[5])*(b1[5] - b2[5]);
303 		sse += (b1[6] - b2[6])*(b1[6] - b2[6]);
304 		sse += (b1[7] - b2[7])*(b1[7] - b2[7]);
305 
306 		b1 = (const int16_t*)((int8_t*)b1+stride);
307 		b2 = (const int16_t*)((int8_t*)b2+stride);
308 	}
309 
310 	return(sse);
311 }
312 
313 uint32_t
sse8_8bit_c(const uint8_t * b1,const uint8_t * b2,const uint32_t stride)314 sse8_8bit_c(const uint8_t * b1,
315 			const uint8_t * b2,
316 			const uint32_t stride)
317 {
318 	int i;
319 	int sse = 0;
320 
321 	for (i=0; i<8; i++) {
322 		sse += (b1[0] - b2[0])*(b1[0] - b2[0]);
323 		sse += (b1[1] - b2[1])*(b1[1] - b2[1]);
324 		sse += (b1[2] - b2[2])*(b1[2] - b2[2]);
325 		sse += (b1[3] - b2[3])*(b1[3] - b2[3]);
326 		sse += (b1[4] - b2[4])*(b1[4] - b2[4]);
327 		sse += (b1[5] - b2[5])*(b1[5] - b2[5]);
328 		sse += (b1[6] - b2[6])*(b1[6] - b2[6]);
329 		sse += (b1[7] - b2[7])*(b1[7] - b2[7]);
330 
331 		b1 = b1+stride;
332 		b2 = b2+stride;
333 	}
334 
335 	return(sse);
336 }
337 
338 
339 /* PSNR-HVS-M helper functions */
340 
341 static const int16_t iMask_Coeff[64] = {
342         0, 29788, 32767, 20479, 13653, 8192, 6425, 5372,
343     27306, 27306, 23405, 17246, 12603, 5650, 5461, 5958,
344     23405, 25205, 20479, 13653,  8192, 5749, 4749, 5851,
345     23405, 19275, 14894, 11299,  6425, 3766, 4096, 5285,
346     18204, 14894,  8856,  5851,  4819, 3006, 3181, 4255,
347     13653,  9362,  5958,  5120,  4045, 3151, 2900, 3562,
348      6687,  5120,  4201,  3766,  3181, 2708, 2730, 3244,
349      4551,  3562,  3449,  3344,  2926, 3277, 3181, 3310
350 };
351 
352 /* Calculate CSF weighted energy of DCT coefficients */
353 
354 uint32_t
coeff8_energy_c(const int16_t * dct)355 coeff8_energy_c(const int16_t * dct)
356 {
357 	int x, y;
358 	uint32_t sum_a = 0;
359 
360 	for (y = 0; y < 8; y += 2) {
361 		for (x = 0; x < 8; x += 2) {
362 			int16_t a0 = ((dct[y*8+x]<<4) * iMask_Coeff[y*8+x]) >> 16;
363 			int16_t a1 = ((dct[y*8+x+1]<<4) * iMask_Coeff[y*8+x+1]) >> 16;
364 			int16_t a2 = ((dct[(y+1)*8+x]<<4) * iMask_Coeff[(y+1)*8+x]) >> 16;
365 			int16_t a3 = ((dct[(y+1)*8+x+1]<<4) * iMask_Coeff[(y+1)*8+x+1]) >> 16;
366 
367 			sum_a += ((a0*a0 + a1*a1 + a2*a2 + a3*a3) >> 3);
368 		}
369 	}
370 
371 	return sum_a;
372 }
373 
374 /* Calculate MSE of DCT coeffs reduced by masking effect */
375 
376 uint32_t
sseh8_16bit_c(const int16_t * cur,const int16_t * ref,uint16_t mask)377 sseh8_16bit_c(const int16_t * cur, const int16_t * ref, uint16_t mask)
378 {
379 	int j, i;
380 	uint32_t mse_h = 0;
381 
382 	for (j = 0; j < 8; j++) {
383 		for (i = 0; i < 8; i++) {
384 			uint32_t t = (mask * Inv_iMask_Coeff[j*8+i] + 32) >> 7;
385 			uint16_t u = abs(cur[j*8+i] - ref[j*8+i]) << 4;
386 			uint16_t thresh = (t < 65536) ? t : 65535;
387 
388 			if (u < thresh)
389 				u = 0; /* The error is not perceivable */
390 			else
391 				u -= thresh;
392 
393 			u = ((u + iCSF_Round[j*8 + i]) * iCSF_Coeff[j*8 + i]) >> 16;
394 
395 			mse_h += (uint32_t) (u * u);
396 		}
397 	}
398 
399 	return mse_h;
400 }
401 
402 /* Sums all pixels of 8x8 block */
403 
404 uint32_t
blocksum8_c(const uint8_t * cur,int stride,uint16_t sums[4],uint32_t squares[4])405 blocksum8_c(const uint8_t * cur, int stride, uint16_t sums[4], uint32_t squares[4])
406 {
407 	int i, j;
408 	uint32_t sum = 0;
409 
410 	sums[0] = sums[1] = sums[2] = sums[3] = 0;
411 	squares[0] = squares[1] = squares[2] = squares[3] = 0;
412 
413 	for (j = 0; j < 8; j++) {
414 		for (i = 0; i < 8; i++) {
415 			uint8_t p = cur[j*stride + i];
416 
417 			sums[(j>>2)*2 + (i>>2)] += p;
418 			squares[(j>>2)*2 + (i>>2)] += p*p;
419 
420 			sum += p;
421 		}
422 	}
423 
424 	return sum;
425 }
426