1 /**************************************************************************
2  *
3  *  XVID MPEG-4 VIDEO CODEC
4  *  - Image management functions -
5  *
6  *  Copyright(C) 2001-2010 Peter Ross <pross@xvid.org>
7  *
8  *  This program is free software ; you can redistribute it and/or modify
9  *  it under the terms of the GNU General Public License as published by
10  *  the Free Software Foundation ; either version 2 of the License, or
11  *  (at your option) any later version.
12  *
13  *  This program is distributed in the hope that it will be useful,
14  *  but WITHOUT ANY WARRANTY ; without even the implied warranty of
15  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  *  GNU General Public License for more details.
17  *
18  *  You should have received a copy of the GNU General Public License
19  *  along with this program ; if not, write to the Free Software
20  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
21  *
22  * $Id: image.c 2172 2019-01-17 14:24:50Z Isibaar $
23  *
24  ****************************************************************************/
25 
26 #include <stdlib.h>
27 #include <string.h>				/* memcpy, memset */
28 #include <math.h>
29 #include "../portab.h"
30 #include "../global.h"			/* XVID_CSP_XXX's */
31 #include "../xvid.h"			/* XVID_CSP_XXX's */
32 #include "image.h"
33 #include "colorspace.h"
34 #include "interpolate8x8.h"
35 #include "../utils/mem_align.h"
36 #include "../motion/sad.h"
37 #include "../utils/emms.h"
38 
39 #include "font.h"		/* XXX: remove later */
40 
41 #define SAFETY	64
42 #define EDGE_SIZE2  (EDGE_SIZE/2)
43 
44 
45 int32_t
image_create(IMAGE * image,uint32_t edged_width,uint32_t edged_height)46 image_create(IMAGE * image,
47 			 uint32_t edged_width,
48 			 uint32_t edged_height)
49 {
50 	const uint32_t edged_width2 = edged_width / 2;
51 	const uint32_t edged_height2 = edged_height / 2;
52 
53 	image->y =
54 		xvid_malloc(edged_width * (edged_height + 1) + SAFETY, CACHE_LINE);
55 	if (image->y == NULL) {
56 		return -1;
57 	}
58 	memset(image->y, 0, edged_width * (edged_height + 1) + SAFETY);
59 
60 	image->u = xvid_malloc(edged_width2 * edged_height2 + SAFETY, CACHE_LINE);
61 	if (image->u == NULL) {
62 		xvid_free(image->y);
63 		image->y = NULL;
64 		return -1;
65 	}
66 	memset(image->u, 0, edged_width2 * edged_height2 + SAFETY);
67 
68 	image->v = xvid_malloc(edged_width2 * edged_height2 + SAFETY, CACHE_LINE);
69 	if (image->v == NULL) {
70 		xvid_free(image->u);
71 		image->u = NULL;
72 		xvid_free(image->y);
73 		image->y = NULL;
74 		return -1;
75 	}
76 	memset(image->v, 0, edged_width2 * edged_height2 + SAFETY);
77 
78 	image->y += EDGE_SIZE * edged_width + EDGE_SIZE;
79 	image->u += EDGE_SIZE2 * edged_width2 + EDGE_SIZE2;
80 	image->v += EDGE_SIZE2 * edged_width2 + EDGE_SIZE2;
81 
82 	return 0;
83 }
84 
85 
86 
87 void
image_destroy(IMAGE * image,uint32_t edged_width,uint32_t edged_height)88 image_destroy(IMAGE * image,
89 			  uint32_t edged_width,
90 			  uint32_t edged_height)
91 {
92 	const uint32_t edged_width2 = edged_width / 2;
93 
94 	if (image->y) {
95 		xvid_free(image->y - (EDGE_SIZE * edged_width + EDGE_SIZE));
96 		image->y = NULL;
97 	}
98 	if (image->u) {
99 		xvid_free(image->u - (EDGE_SIZE2 * edged_width2 + EDGE_SIZE2));
100 		image->u = NULL;
101 	}
102 	if (image->v) {
103 		xvid_free(image->v - (EDGE_SIZE2 * edged_width2 + EDGE_SIZE2));
104 		image->v = NULL;
105 	}
106 }
107 
108 
109 void
image_swap(IMAGE * image1,IMAGE * image2)110 image_swap(IMAGE * image1,
111 		   IMAGE * image2)
112 {
113     SWAP(uint8_t*, image1->y, image2->y);
114     SWAP(uint8_t*, image1->u, image2->u);
115     SWAP(uint8_t*, image1->v, image2->v);
116 }
117 
118 
119 void
image_copy(IMAGE * image1,IMAGE * image2,uint32_t edged_width,uint32_t height)120 image_copy(IMAGE * image1,
121 		   IMAGE * image2,
122 		   uint32_t edged_width,
123 		   uint32_t height)
124 {
125 	memcpy(image1->y, image2->y, edged_width * height);
126 	memcpy(image1->u, image2->u, edged_width * height / 4);
127 	memcpy(image1->v, image2->v, edged_width * height / 4);
128 }
129 
130 /* setedges bug was in this BS versions */
131 #define SETEDGES_BUG_BEFORE		18
132 #define SETEDGES_BUG_AFTER		57
133 #define SETEDGES_BUG_REFIXED		63
134 
135 void
image_setedges(IMAGE * image,uint32_t edged_width,uint32_t edged_height,uint32_t width,uint32_t height,int bs_version)136 image_setedges(IMAGE * image,
137 			   uint32_t edged_width,
138 			   uint32_t edged_height,
139 			   uint32_t width,
140 			   uint32_t height,
141 			   int bs_version)
142 {
143 	const uint32_t edged_width2 = edged_width / 2;
144 	uint32_t width2;
145 	uint32_t i;
146 	uint8_t *dst;
147 	uint8_t *src;
148 
149 	dst = image->y - (EDGE_SIZE + EDGE_SIZE * edged_width);
150 	src = image->y;
151 
152 	/* According to the Standard Clause 7.6.4, padding is done starting at 16
153 	 * pixel width and height multiples. This was not respected in old xvids */
154 	if ((bs_version >= SETEDGES_BUG_BEFORE &&
155 		bs_version <  SETEDGES_BUG_AFTER) ||
156 		bs_version >= SETEDGES_BUG_REFIXED) {
157 		width  = (width+15)&~15;
158 		height = (height+15)&~15;
159 	}
160 
161 	width2 = width/2;
162 
163 	for (i = 0; i < EDGE_SIZE; i++) {
164 		memset(dst, *src, EDGE_SIZE);
165 		memcpy(dst + EDGE_SIZE, src, width);
166 		memset(dst + edged_width - EDGE_SIZE, *(src + width - 1),
167 			   EDGE_SIZE);
168 		dst += edged_width;
169 	}
170 
171 	for (i = 0; i < height; i++) {
172 		memset(dst, *src, EDGE_SIZE);
173 		memset(dst + edged_width - EDGE_SIZE, src[width - 1], EDGE_SIZE);
174 		dst += edged_width;
175 		src += edged_width;
176 	}
177 
178 	src -= edged_width;
179 	for (i = 0; i < EDGE_SIZE; i++) {
180 		memset(dst, *src, EDGE_SIZE);
181 		memcpy(dst + EDGE_SIZE, src, width);
182 		memset(dst + edged_width - EDGE_SIZE, *(src + width - 1),
183 				   EDGE_SIZE);
184 		dst += edged_width;
185 	}
186 
187 
188 	/* U */
189 	dst = image->u - (EDGE_SIZE2 + EDGE_SIZE2 * edged_width2);
190 	src = image->u;
191 
192 	for (i = 0; i < EDGE_SIZE2; i++) {
193 		memset(dst, *src, EDGE_SIZE2);
194 		memcpy(dst + EDGE_SIZE2, src, width2);
195 		memset(dst + edged_width2 - EDGE_SIZE2, *(src + width2 - 1),
196 			   EDGE_SIZE2);
197 		dst += edged_width2;
198 	}
199 
200 	for (i = 0; i < height / 2; i++) {
201 		memset(dst, *src, EDGE_SIZE2);
202 		memset(dst + edged_width2 - EDGE_SIZE2, src[width2 - 1], EDGE_SIZE2);
203 		dst += edged_width2;
204 		src += edged_width2;
205 	}
206 	src -= edged_width2;
207 	for (i = 0; i < EDGE_SIZE2; i++) {
208 		memset(dst, *src, EDGE_SIZE2);
209 		memcpy(dst + EDGE_SIZE2, src, width2);
210 		memset(dst + edged_width2 - EDGE_SIZE2, *(src + width2 - 1),
211 			   EDGE_SIZE2);
212 		dst += edged_width2;
213 	}
214 
215 
216 	/* V */
217 	dst = image->v - (EDGE_SIZE2 + EDGE_SIZE2 * edged_width2);
218 	src = image->v;
219 
220 	for (i = 0; i < EDGE_SIZE2; i++) {
221 		memset(dst, *src, EDGE_SIZE2);
222 		memcpy(dst + EDGE_SIZE2, src, width2);
223 		memset(dst + edged_width2 - EDGE_SIZE2, *(src + width2 - 1),
224 			   EDGE_SIZE2);
225 		dst += edged_width2;
226 	}
227 
228 	for (i = 0; i < height / 2; i++) {
229 		memset(dst, *src, EDGE_SIZE2);
230 		memset(dst + edged_width2 - EDGE_SIZE2, src[width2 - 1], EDGE_SIZE2);
231 		dst += edged_width2;
232 		src += edged_width2;
233 	}
234 	src -= edged_width2;
235 	for (i = 0; i < EDGE_SIZE2; i++) {
236 		memset(dst, *src, EDGE_SIZE2);
237 		memcpy(dst + EDGE_SIZE2, src, width2);
238 		memset(dst + edged_width2 - EDGE_SIZE2, *(src + width2 - 1),
239 			   EDGE_SIZE2);
240 		dst += edged_width2;
241 	}
242 }
243 
244 void
image_interpolate(const uint8_t * refn,uint8_t * refh,uint8_t * refv,uint8_t * refhv,uint32_t edged_width,uint32_t edged_height,uint32_t quarterpel,uint32_t rounding)245 image_interpolate(const uint8_t * refn,
246 				  uint8_t * refh,
247 				  uint8_t * refv,
248 				  uint8_t * refhv,
249 				  uint32_t edged_width,
250 				  uint32_t edged_height,
251 				  uint32_t quarterpel,
252 				  uint32_t rounding)
253 {
254 	const uint32_t offset = EDGE_SIZE2 * (edged_width + 1); /* we only interpolate half of the edge area */
255 	const uint32_t stride_add = 7 * edged_width;
256 
257 	uint8_t *n_ptr;
258 	uint8_t *h_ptr, *v_ptr, *hv_ptr;
259 	uint32_t x, y;
260 
261 	n_ptr = (uint8_t*)refn;
262 	h_ptr = refh;
263 	v_ptr = refv;
264 
265 	n_ptr -= offset;
266 	h_ptr -= offset;
267 	v_ptr -= offset;
268 
269 	/* Note we initialize the hv pointer later, as we can optimize code a bit
270 	 * doing it down to up in quarterpel and up to down in halfpel */
271 	if(quarterpel) {
272 
273 		for (y = 0; y < (edged_height - EDGE_SIZE); y += 8) {
274 			for (x = 0; x < (edged_width - EDGE_SIZE); x += 8) {
275 				interpolate8x8_6tap_lowpass_h(h_ptr, n_ptr, edged_width, rounding);
276 				interpolate8x8_6tap_lowpass_v(v_ptr, n_ptr, edged_width, rounding);
277 
278 				n_ptr += 8;
279 				h_ptr += 8;
280 				v_ptr += 8;
281 			}
282 
283 			n_ptr += EDGE_SIZE;
284 			h_ptr += EDGE_SIZE;
285 			v_ptr += EDGE_SIZE;
286 
287 			h_ptr += stride_add;
288 			v_ptr += stride_add;
289 			n_ptr += stride_add;
290 		}
291 
292 		h_ptr = refh + (edged_height - EDGE_SIZE - EDGE_SIZE2)*edged_width - EDGE_SIZE2;
293 		hv_ptr = refhv + (edged_height - EDGE_SIZE - EDGE_SIZE2)*edged_width - EDGE_SIZE2;
294 
295 		for (y = 0; y < (edged_height - EDGE_SIZE); y = y + 8) {
296 			hv_ptr -= stride_add;
297 			h_ptr -= stride_add;
298 			hv_ptr -= EDGE_SIZE;
299 			h_ptr -= EDGE_SIZE;
300 
301 			for (x = 0; x < (edged_width - EDGE_SIZE); x = x + 8) {
302 				hv_ptr -= 8;
303 				h_ptr -= 8;
304 				interpolate8x8_6tap_lowpass_v(hv_ptr, h_ptr, edged_width, rounding);
305 			}
306 		}
307 	} else {
308 
309 		hv_ptr = refhv;
310 		hv_ptr -= offset;
311 
312 		for (y = 0; y < (edged_height - EDGE_SIZE); y += 8) {
313 			for (x = 0; x < (edged_width - EDGE_SIZE); x += 8) {
314 				interpolate8x8_halfpel_h(h_ptr, n_ptr, edged_width, rounding);
315 				interpolate8x8_halfpel_v(v_ptr, n_ptr, edged_width, rounding);
316 				interpolate8x8_halfpel_hv(hv_ptr, n_ptr, edged_width, rounding);
317 
318 				n_ptr += 8;
319 				h_ptr += 8;
320 				v_ptr += 8;
321 				hv_ptr += 8;
322 			}
323 
324 			h_ptr += EDGE_SIZE;
325 			v_ptr += EDGE_SIZE;
326 			hv_ptr += EDGE_SIZE;
327 			n_ptr += EDGE_SIZE;
328 
329 			h_ptr += stride_add;
330 			v_ptr += stride_add;
331 			hv_ptr += stride_add;
332 			n_ptr += stride_add;
333 		}
334 	}
335 }
336 
337 
338 /*
339 chroma optimize filter, invented by mf
340 a chroma pixel is average from the surrounding pixels, when the
341 correpsonding luma pixels are pure black or white.
342 */
343 
344 void
image_chroma_optimize(IMAGE * img,int width,int height,int edged_width)345 image_chroma_optimize(IMAGE * img, int width, int height, int edged_width)
346 {
347 	int x,y;
348 	int pixels = 0;
349 
350 	for (y = 1; y < height/2 - 1; y++)
351 	for (x = 1; x < width/2 - 1; x++)
352 	{
353 #define IS_PURE(a)  ((a)<=16||(a)>=235)
354 #define IMG_Y(Y,X)	img->y[(Y)*edged_width + (X)]
355 #define IMG_U(Y,X)	img->u[(Y)*edged_width/2 + (X)]
356 #define IMG_V(Y,X)	img->v[(Y)*edged_width/2 + (X)]
357 
358 		if (IS_PURE(IMG_Y(y*2  ,x*2  )) &&
359 			IS_PURE(IMG_Y(y*2  ,x*2+1)) &&
360 			IS_PURE(IMG_Y(y*2+1,x*2  )) &&
361 			IS_PURE(IMG_Y(y*2+1,x*2+1)))
362 		{
363 			IMG_U(y,x) = (IMG_U(y,x-1) + IMG_U(y-1, x) + IMG_U(y, x+1) + IMG_U(y+1, x)) / 4;
364 			IMG_V(y,x) = (IMG_V(y,x-1) + IMG_V(y-1, x) + IMG_V(y, x+1) + IMG_V(y+1, x)) / 4;
365 			pixels++;
366 		}
367 
368 #undef IS_PURE
369 #undef IMG_Y
370 #undef IMG_U
371 #undef IMG_V
372 	}
373 
374 	DPRINTF(XVID_DEBUG_DEBUG,"chroma_optimized_pixels = %i/%i\n", pixels, width*height/4);
375 }
376 
377 
378 
379 
380 
381 /*
382   perform safe packed colorspace conversion, by splitting
383   the image up into an optimized area (pixel width divisible by 16),
384   and two unoptimized/plain-c areas (pixel width divisible by 2)
385 */
386 
387 static void
safe_packed_conv(uint8_t * x_ptr,int x_stride,uint8_t * y_ptr,uint8_t * u_ptr,uint8_t * v_ptr,int y_stride,int uv_stride,int width,int height,int vflip,packedFunc * func_opt,packedFunc func_c,int size,int interlacing)388 safe_packed_conv(uint8_t * x_ptr, int x_stride,
389 				 uint8_t * y_ptr, uint8_t * u_ptr, uint8_t * v_ptr,
390 				 int y_stride, int uv_stride,
391 				 int width, int height, int vflip,
392 				 packedFunc * func_opt, packedFunc func_c,
393                  int size, int interlacing)
394 {
395 	int width_opt, width_c, height_opt;
396 
397     if (width<0 || width==1 || height==1) return; /* forget about it */
398 
399 	if (func_opt != func_c && x_stride < size*((width+15)/16)*16)
400 	{
401 		width_opt = width & (~15);
402 		width_c = (width - width_opt) & (~1);
403 	}
404 	else if (func_opt != func_c && !(width&1) && (size==3))
405 	{
406         /* MMX reads 4 bytes per pixel for RGB/BGR */
407         width_opt = width - 2;
408         width_c = 2;
409     }
410     else {
411         /* Enforce the width to be divisable by two. */
412 		width_opt = width & (~1);
413 		width_c = 0;
414 	}
415 
416     /* packed conversions require height to be divisable by 2
417        (or even by 4 for interlaced conversion) */
418        if (interlacing)
419                height_opt = height & (~3);
420        else
421                height_opt = height & (~1);
422 
423 	func_opt(x_ptr, x_stride,
424 			y_ptr, u_ptr, v_ptr, y_stride, uv_stride,
425 			width_opt, height_opt, vflip);
426 
427 	if (width_c)
428 	{
429 		func_c(x_ptr + size*width_opt, x_stride,
430 			y_ptr + width_opt, u_ptr + width_opt/2, v_ptr + width_opt/2,
431 			y_stride, uv_stride, width_c, height_opt, vflip);
432 	}
433 }
434 
435 
436 
437 int
image_input(IMAGE * image,uint32_t width,int height,uint32_t edged_width,uint8_t * src[4],int src_stride[4],int csp,int interlacing)438 image_input(IMAGE * image,
439 			uint32_t width,
440 			int height,
441 			uint32_t edged_width,
442 			uint8_t * src[4],
443 			int src_stride[4],
444 			int csp,
445 			int interlacing)
446 {
447 	const int edged_width2 = edged_width/2;
448 	const int width2 = width/2;
449 	const int height2 = height/2;
450 #if 0
451 	const int height_signed = (csp & XVID_CSP_VFLIP) ? -height : height;
452 #endif
453 
454 	switch (csp & ~XVID_CSP_VFLIP) {
455 	case XVID_CSP_RGB555:
456 		safe_packed_conv(
457 			src[0], src_stride[0], image->y, image->u, image->v,
458 			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
459 			interlacing?rgb555i_to_yv12  :rgb555_to_yv12,
460 			interlacing?rgb555i_to_yv12_c:rgb555_to_yv12_c, 2, interlacing);
461 		break;
462 
463 	case XVID_CSP_RGB565:
464 		safe_packed_conv(
465 			src[0], src_stride[0], image->y, image->u, image->v,
466 			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
467 			interlacing?rgb565i_to_yv12  :rgb565_to_yv12,
468 			interlacing?rgb565i_to_yv12_c:rgb565_to_yv12_c, 2, interlacing);
469 		break;
470 
471 
472 	case XVID_CSP_BGR:
473 		safe_packed_conv(
474 			src[0], src_stride[0], image->y, image->u, image->v,
475 			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
476 			interlacing?bgri_to_yv12  :bgr_to_yv12,
477 			interlacing?bgri_to_yv12_c:bgr_to_yv12_c, 3, interlacing);
478 		break;
479 
480 	case XVID_CSP_BGRA:
481 		safe_packed_conv(
482 			src[0], src_stride[0], image->y, image->u, image->v,
483 			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
484 			interlacing?bgrai_to_yv12  :bgra_to_yv12,
485 			interlacing?bgrai_to_yv12_c:bgra_to_yv12_c, 4, interlacing);
486 		break;
487 
488 	case XVID_CSP_ABGR :
489 		safe_packed_conv(
490 			src[0], src_stride[0], image->y, image->u, image->v,
491 			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
492 			interlacing?abgri_to_yv12  :abgr_to_yv12,
493 			interlacing?abgri_to_yv12_c:abgr_to_yv12_c, 4, interlacing);
494 		break;
495 
496 	case XVID_CSP_RGB:
497 		safe_packed_conv(
498 			src[0], src_stride[0], image->y, image->u, image->v,
499 			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
500 			interlacing?rgbi_to_yv12  :rgb_to_yv12,
501 			interlacing?rgbi_to_yv12_c:rgb_to_yv12_c, 3, interlacing);
502 		break;
503 
504 	case XVID_CSP_RGBA :
505 		safe_packed_conv(
506 			src[0], src_stride[0], image->y, image->u, image->v,
507 			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
508 			interlacing?rgbai_to_yv12  :rgba_to_yv12,
509 			interlacing?rgbai_to_yv12_c:rgba_to_yv12_c, 4, interlacing);
510 		break;
511 
512 	case XVID_CSP_ARGB:
513 		safe_packed_conv(
514 			src[0], src_stride[0], image->y, image->u, image->v,
515 			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
516 			interlacing?argbi_to_yv12  : argb_to_yv12,
517 			interlacing?argbi_to_yv12_c: argb_to_yv12_c, 4, interlacing);
518 		break;
519 
520 	case XVID_CSP_YUY2:
521 		safe_packed_conv(
522 			src[0], src_stride[0], image->y, image->u, image->v,
523 			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
524 			interlacing?yuyvi_to_yv12  :yuyv_to_yv12,
525 			interlacing?yuyvi_to_yv12_c:yuyv_to_yv12_c, 2, interlacing);
526 		break;
527 
528 	case XVID_CSP_YVYU:		/* u/v swapped */
529 		safe_packed_conv(
530 			src[0], src_stride[0], image->y, image->v, image->u,
531 			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
532 			interlacing?yuyvi_to_yv12  :yuyv_to_yv12,
533 			interlacing?yuyvi_to_yv12_c:yuyv_to_yv12_c, 2, interlacing);
534 		break;
535 
536 	case XVID_CSP_UYVY:
537 		safe_packed_conv(
538 			src[0], src_stride[0], image->y, image->u, image->v,
539 			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
540 			interlacing?uyvyi_to_yv12  :uyvy_to_yv12,
541 			interlacing?uyvyi_to_yv12_c:uyvy_to_yv12_c, 2, interlacing);
542 		break;
543 
544 	case XVID_CSP_I420:	/* YCbCr == YUV == internal colorspace for MPEG */
545 		yv12_to_yv12(image->y, image->u, image->v, edged_width, edged_width2,
546 			src[0], src[0] + src_stride[0]*height, src[0] + src_stride[0]*height + (src_stride[0]/2)*height2,
547 			src_stride[0], src_stride[0]/2, width, height, (csp & XVID_CSP_VFLIP));
548 		break;
549 
550 	case XVID_CSP_YV12: /* YCrCb == YVA == U and V plane swapped */
551 		yv12_to_yv12(image->y, image->v, image->u, edged_width, edged_width2,
552 			src[0], src[0] + src_stride[0]*height, src[0] + src_stride[0]*height + (src_stride[0]/2)*height2,
553 			src_stride[0], src_stride[0]/2, width, height, (csp & XVID_CSP_VFLIP));
554 		break;
555 
556 	case XVID_CSP_PLANAR:  /* YCbCr with arbitrary pointers and different strides for Y and UV */
557 		yv12_to_yv12(image->y, image->u, image->v, edged_width, edged_width2,
558 			src[0], src[1], src[2], src_stride[0], src_stride[1],  /* v: dst_stride[2] not yet supported */
559 			width, height, (csp & XVID_CSP_VFLIP));
560 		break;
561 
562 	case XVID_CSP_NULL:
563 		break;
564 
565 	default :
566 		return -1;
567 	}
568 
569 
570 	/* pad out image when the width and/or height is not a multiple of 16 */
571 
572 	if (width & 15)
573 	{
574 		int i;
575 		int pad_width = 16 - (width&15);
576 		for (i = 0; i < height; i++)
577 		{
578 			memset(image->y + i*edged_width + width,
579 				 *(image->y + i*edged_width + width - 1), pad_width);
580 		}
581 		for (i = 0; i < height/2; i++)
582 		{
583 			memset(image->u + i*edged_width2 + width2,
584 				 *(image->u + i*edged_width2 + width2 - 1),pad_width/2);
585 			memset(image->v + i*edged_width2 + width2,
586 				 *(image->v + i*edged_width2 + width2 - 1),pad_width/2);
587 		}
588 	}
589 
590 	if (height & 15)
591 	{
592 		int pad_height = 16 - (height&15);
593 		int length = ((width+15)/16)*16;
594 		int i;
595 		for (i = 0; i < pad_height; i++)
596 		{
597 			memcpy(image->y + (height+i)*edged_width,
598 				   image->y + (height-1)*edged_width,length);
599 		}
600 
601 		for (i = 0; i < pad_height/2; i++)
602 		{
603 			memcpy(image->u + (height2+i)*edged_width2,
604 				   image->u + (height2-1)*edged_width2,length/2);
605 			memcpy(image->v + (height2+i)*edged_width2,
606 				   image->v + (height2-1)*edged_width2,length/2);
607 		}
608 	}
609 
610 /*
611 	if (interlacing)
612 		image_printf(image, edged_width, height, 5,5, "[i]");
613 	image_dump_yuvpgm(image, edged_width, ((width+15)/16)*16, ((height+15)/16)*16, "\\encode.pgm");
614 */
615 	return 0;
616 }
617 
618 
619 
620 int
image_output(IMAGE * image,uint32_t width,int height,uint32_t edged_width,uint8_t * dst[4],int dst_stride[4],int csp,int interlacing)621 image_output(IMAGE * image,
622 			 uint32_t width,
623 			 int height,
624 			 uint32_t edged_width,
625 			 uint8_t * dst[4],
626 			 int dst_stride[4],
627 			 int csp,
628 			 int interlacing)
629 {
630 	const int edged_width2 = edged_width/2;
631 	int height2 = height/2;
632 
633 /*
634 	if (interlacing)
635 		image_printf(image, edged_width, height, 5,100, "[i]=%i,%i",width,height);
636 	image_dump_yuvpgm(image, edged_width, width, height, "\\decode.pgm");
637 */
638 
639 	switch (csp & ~XVID_CSP_VFLIP) {
640 	case XVID_CSP_RGB555:
641 		safe_packed_conv(
642 			dst[0], dst_stride[0], image->y, image->u, image->v,
643 			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
644 			interlacing?yv12_to_rgb555i  :yv12_to_rgb555,
645 			interlacing?yv12_to_rgb555i_c:yv12_to_rgb555_c, 2, interlacing);
646 		return 0;
647 
648 	case XVID_CSP_RGB565:
649 		safe_packed_conv(
650 			dst[0], dst_stride[0], image->y, image->u, image->v,
651 			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
652 			interlacing?yv12_to_rgb565i  :yv12_to_rgb565,
653 			interlacing?yv12_to_rgb565i_c:yv12_to_rgb565_c, 2, interlacing);
654 		return 0;
655 
656     case XVID_CSP_BGR:
657 		safe_packed_conv(
658 			dst[0], dst_stride[0], image->y, image->u, image->v,
659 			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
660 			interlacing?yv12_to_bgri  :yv12_to_bgr,
661 			interlacing?yv12_to_bgri_c:yv12_to_bgr_c, 3, interlacing);
662 		return 0;
663 
664 	case XVID_CSP_BGRA:
665 		safe_packed_conv(
666 			dst[0], dst_stride[0], image->y, image->u, image->v,
667 			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
668 			interlacing?yv12_to_bgrai  :yv12_to_bgra,
669 			interlacing?yv12_to_bgrai_c:yv12_to_bgra_c, 4, interlacing);
670 		return 0;
671 
672 	case XVID_CSP_ABGR:
673 		safe_packed_conv(
674 			dst[0], dst_stride[0], image->y, image->u, image->v,
675 			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
676 			interlacing?yv12_to_abgri  :yv12_to_abgr,
677 			interlacing?yv12_to_abgri_c:yv12_to_abgr_c, 4, interlacing);
678 		return 0;
679 
680 	case XVID_CSP_RGB:
681 		safe_packed_conv(
682 			dst[0], dst_stride[0], image->y, image->u, image->v,
683 			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
684 			interlacing?yv12_to_rgbi  :yv12_to_rgb,
685 			interlacing?yv12_to_rgbi_c:yv12_to_rgb_c, 3, interlacing);
686 		return 0;
687 
688 	case XVID_CSP_RGBA:
689 		safe_packed_conv(
690 			dst[0], dst_stride[0], image->y, image->u, image->v,
691 			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
692 			interlacing?yv12_to_rgbai  :yv12_to_rgba,
693 			interlacing?yv12_to_rgbai_c:yv12_to_rgba_c, 4, interlacing);
694 		return 0;
695 
696 	case XVID_CSP_ARGB:
697 		safe_packed_conv(
698 			dst[0], dst_stride[0], image->y, image->u, image->v,
699 			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
700 			interlacing?yv12_to_argbi  :yv12_to_argb,
701 			interlacing?yv12_to_argbi_c:yv12_to_argb_c, 4, interlacing);
702 		return 0;
703 
704 	case XVID_CSP_YUY2:
705 		safe_packed_conv(
706 			dst[0], dst_stride[0], image->y, image->u, image->v,
707 			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
708 			interlacing?yv12_to_yuyvi  :yv12_to_yuyv,
709 			interlacing?yv12_to_yuyvi_c:yv12_to_yuyv_c, 2, interlacing);
710 		return 0;
711 
712 	case XVID_CSP_YVYU:		/* u,v swapped */
713 		safe_packed_conv(
714 			dst[0], dst_stride[0], image->y, image->v, image->u,
715 			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
716 			interlacing?yv12_to_yuyvi  :yv12_to_yuyv,
717 			interlacing?yv12_to_yuyvi_c:yv12_to_yuyv_c, 2, interlacing);
718 		return 0;
719 
720 	case XVID_CSP_UYVY:
721 		safe_packed_conv(
722 			dst[0], dst_stride[0], image->y, image->u, image->v,
723 			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
724 			interlacing?yv12_to_uyvyi  :yv12_to_uyvy,
725 			interlacing?yv12_to_uyvyi_c:yv12_to_uyvy_c, 2, interlacing);
726 		return 0;
727 
728 	case XVID_CSP_I420: /* YCbCr == YUV == internal colorspace for MPEG */
729 		yv12_to_yv12(dst[0], dst[0] + dst_stride[0]*height, dst[0] + dst_stride[0]*height + (dst_stride[0]/2)*height2,
730 			dst_stride[0], dst_stride[0]/2,
731 			image->y, image->u, image->v, edged_width, edged_width2,
732 			width, height, (csp & XVID_CSP_VFLIP));
733 		return 0;
734 
735 	case XVID_CSP_YV12:	/* YCrCb == YVU == U and V plane swapped */
736 		yv12_to_yv12(dst[0], dst[0] + dst_stride[0]*height, dst[0] + dst_stride[0]*height + (dst_stride[0]/2)*height2,
737 			dst_stride[0], dst_stride[0]/2,
738 			image->y, image->v, image->u, edged_width, edged_width2,
739 			width, height, (csp & XVID_CSP_VFLIP));
740 		return 0;
741 
742 	case XVID_CSP_PLANAR:  /* YCbCr with arbitrary pointers and different strides for Y and UV */
743 		yv12_to_yv12(dst[0], dst[1], dst[2],
744 			dst_stride[0], dst_stride[1],	/* v: dst_stride[2] not yet supported */
745 			image->y, image->u, image->v, edged_width, edged_width2,
746 			width, height, (csp & XVID_CSP_VFLIP));
747 		return 0;
748 
749 	case XVID_CSP_INTERNAL :
750 		dst[0] = image->y;
751 		dst[1] = image->u;
752 		dst[2] = image->v;
753 		dst_stride[0] = edged_width;
754 		dst_stride[1] = edged_width/2;
755 		dst_stride[2] = edged_width/2;
756 		return 0;
757 
758 	case XVID_CSP_NULL:
759 	case XVID_CSP_SLICE:
760 		return 0;
761 
762 	}
763 
764 	return -1;
765 }
766 
767 float
image_psnr(IMAGE * orig_image,IMAGE * recon_image,uint16_t stride,uint16_t width,uint16_t height)768 image_psnr(IMAGE * orig_image,
769 		   IMAGE * recon_image,
770 		   uint16_t stride,
771 		   uint16_t width,
772 		   uint16_t height)
773 {
774 	int32_t diff, x, y, quad = 0;
775 	uint8_t *orig = orig_image->y;
776 	uint8_t *recon = recon_image->y;
777 	float psnr_y;
778 
779 	for (y = 0; y < height; y++) {
780 		for (x = 0; x < width; x++) {
781 			diff = *(orig + x) - *(recon + x);
782 			quad += diff * diff;
783 		}
784 		orig += stride;
785 		recon += stride;
786 	}
787 
788 	psnr_y = (float) quad / (float) (width * height);
789 
790 	if (psnr_y) {
791 		psnr_y = (float) (255 * 255) / psnr_y;
792 		psnr_y = 10 * (float) log10(psnr_y);
793 	} else
794 		psnr_y = (float) 99.99;
795 
796 	return psnr_y;
797 }
798 
799 
sse_to_PSNR(long sse,int pixels)800 float sse_to_PSNR(long sse, int pixels)
801 {
802         if (sse==0)
803                 return 99.99F;
804 
805         return 48.131F - 10*(float)log10((float)sse/(float)(pixels));   /* log10(255*255)=4.8131 */
806 
807 }
808 
plane_sse(uint8_t * orig,uint8_t * recon,uint16_t stride,uint16_t width,uint16_t height)809 long plane_sse(uint8_t *orig,
810 			   uint8_t *recon,
811 			   uint16_t stride,
812 			   uint16_t width,
813 			   uint16_t height)
814 {
815 	int y, bwidth, bheight;
816 	long sse = 0;
817 
818 	bwidth  = width  & (~0x07);
819 	bheight = height & (~0x07);
820 
821 	/* Compute the 8x8 integer part */
822 	for (y = 0; y<bheight; y += 8) {
823 		int x;
824 
825 		/* Compute sse for the band */
826 		for (x = 0; x<bwidth; x += 8)
827 			sse += sse8_8bit(orig  + x, recon + x, stride);
828 
829 		/* remaining pixels of the 8 pixels high band */
830 		for (x = bwidth; x < width; x++) {
831 			int diff;
832 			diff = *(orig + 0*stride + x) - *(recon + 0*stride + x);
833 			sse += diff * diff;
834 			diff = *(orig + 1*stride + x) - *(recon + 1*stride + x);
835 			sse += diff * diff;
836 			diff = *(orig + 2*stride + x) - *(recon + 2*stride + x);
837 			sse += diff * diff;
838 			diff = *(orig + 3*stride + x) - *(recon + 3*stride + x);
839 			sse += diff * diff;
840 			diff = *(orig + 4*stride + x) - *(recon + 4*stride + x);
841 			sse += diff * diff;
842 			diff = *(orig + 5*stride + x) - *(recon + 5*stride + x);
843 			sse += diff * diff;
844 			diff = *(orig + 6*stride + x) - *(recon + 6*stride + x);
845 			sse += diff * diff;
846 			diff = *(orig + 7*stride + x) - *(recon + 7*stride + x);
847 			sse += diff * diff;
848 		}
849 
850 		orig  += 8*stride;
851 		recon += 8*stride;
852 	}
853 
854 	/* Compute the down rectangle sse */
855 	for (y = bheight; y < height; y++) {
856 		int x;
857 		for (x = 0; x < width; x++) {
858 			int diff;
859 			diff = *(orig + x) - *(recon + x);
860 			sse += diff * diff;
861 		}
862 		orig += stride;
863 		recon += stride;
864 	}
865 
866 	return (sse);
867 }
868 
image_block_variance(IMAGE * orig_image,uint16_t stride,MACROBLOCK * mbs,uint16_t mb_width,uint16_t mb_height)869 void image_block_variance(IMAGE * orig_image,
870 				          uint16_t stride,
871 				          MACROBLOCK *mbs,
872 				          uint16_t mb_width,
873 				          uint16_t mb_height)
874 {
875 	DECLARE_ALIGNED_MATRIX(sums, 1, 4, uint16_t, CACHE_LINE);
876 	DECLARE_ALIGNED_MATRIX(squares, 1, 4, uint32_t, CACHE_LINE);
877 
878 	int x, y, i, j;
879 	uint8_t *orig_y = orig_image->y;
880 	uint8_t *orig_u = orig_image->u;
881 	uint8_t *orig_v = orig_image->v;
882 
883 	for (y = 0; y < mb_height; y++) {
884 		for (x = 0; x < mb_width; x++) {
885 			MACROBLOCK *pMB = &mbs[x + y * mb_width];
886 			uint32_t var4[4];
887 			uint32_t sum = 0, square = 0;
888 
889 			/* y-blocks */
890 			for (j = 0; j < 2; j++) {
891 				for (i = 0; i < 2; i++) {
892 					int lsum = blocksum8(orig_y + ((y<<4) + (j<<3))*stride + (x<<4) + (i<<3),
893 										 stride, sums, squares);
894 					int lsquare = (squares[0] + squares[1] + squares[2] + squares[3])<<6;
895 
896 					sum += lsum;
897 					square += lsquare;
898 
899 					var4[0] = (squares[0]<<4) - sums[0]*sums[0];
900 					var4[1] = (squares[1]<<4) - sums[1]*sums[1];
901 					var4[2] = (squares[2]<<4) - sums[2]*sums[2];
902 					var4[3] = (squares[3]<<4) - sums[3]*sums[3];
903 
904 					pMB->rel_var8[j*2 + i] = lsquare - lsum*lsum;
905 					if (pMB->rel_var8[j*2 + i])
906 						pMB->rel_var8[j*2 + i] = ((var4[0] + var4[1] + var4[2] + var4[3])<<8) /
907 												 pMB->rel_var8[j*2 + i]; /* 4*(Var(Di)/Var(D)) */
908 					else
909 						pMB->rel_var8[j*2 + i] = 64;
910 				}
911 			}
912 
913 			/* u */
914 			{
915 				int lsum = blocksum8(orig_u + (y<<3)*(stride>>1) + (x<<3),
916 									 stride, sums, squares);
917 				int lsquare = (squares[0] + squares[1] + squares[2] + squares[3])<<6;
918 
919 				sum += lsum;
920 				square += lsquare;
921 
922 				var4[0] = (squares[0]<<4) - sums[0]*sums[0];
923 				var4[1] = (squares[1]<<4) - sums[1]*sums[1];
924 				var4[2] = (squares[2]<<4) - sums[2]*sums[2];
925 				var4[3] = (squares[3]<<4) - sums[3]*sums[3];
926 
927 				pMB->rel_var8[4] = lsquare - lsum*lsum;
928 				if (pMB->rel_var8[4])
929 					pMB->rel_var8[4] = ((var4[0] + var4[1] + var4[2] + var4[3])<<8) /
930 										 pMB->rel_var8[4]; /* 4*(Var(Di)/Var(D)) */
931 				else
932 					pMB->rel_var8[4] = 64;
933 			}
934 
935 			/* v */
936 			{
937 				int lsum = blocksum8(orig_v + (y<<3)*(stride>>1) + (x<<3),
938 									 stride, sums, squares);
939 				int lsquare = (squares[0] + squares[1] + squares[2] + squares[3])<<6;
940 
941 				sum += lsum;
942 				square += lsquare;
943 
944 				var4[0] = (squares[0]<<4) - sums[0]*sums[0];
945 				var4[1] = (squares[1]<<4) - sums[1]*sums[1];
946 				var4[2] = (squares[2]<<4) - sums[2]*sums[2];
947 				var4[3] = (squares[3]<<4) - sums[3]*sums[3];
948 
949 				pMB->rel_var8[5] = lsquare - lsum*lsum;
950 				if (pMB->rel_var8[5])
951 					pMB->rel_var8[5] = ((var4[0] + var4[1] + var4[2] + var4[3])<<8) /
952 										 pMB->rel_var8[5]; /* 4*(Var(Di)/Var(D)) */
953 				else
954 					pMB->rel_var8[5] = 64;
955 			}
956 
957 		}
958 	}
959 }
960 
961 #if 0
962 
963 #include <stdio.h>
964 #include <string.h>
965 
966 int image_dump_pgm(uint8_t * bmp, uint32_t width, uint32_t height, char * filename)
967 {
968 	FILE * f;
969 	char hdr[1024];
970 
971 	f = fopen(filename, "wb");
972 	if ( f == NULL)
973 	{
974 		return -1;
975 	}
976 	sprintf(hdr, "P5\n#xvid\n%i %i\n255\n", width, height);
977 	fwrite(hdr, strlen(hdr), 1, f);
978 	fwrite(bmp, width, height, f);
979 	fclose(f);
980 
981 	return 0;
982 }
983 
984 
985 /* dump image+edges to yuv pgm files */
986 
987 int image_dump(IMAGE * image, uint32_t edged_width, uint32_t edged_height, char * path, int number)
988 {
989 	char filename[1024];
990 
991 	sprintf(filename, "%s_%i_%c.pgm", path, number, 'y');
992 	image_dump_pgm(
993 		image->y - (EDGE_SIZE * edged_width + EDGE_SIZE),
994 		edged_width, edged_height, filename);
995 
996 	sprintf(filename, "%s_%i_%c.pgm", path, number, 'u');
997 	image_dump_pgm(
998 		image->u - (EDGE_SIZE2 * edged_width / 2 + EDGE_SIZE2),
999 		edged_width / 2, edged_height / 2, filename);
1000 
1001 	sprintf(filename, "%s_%i_%c.pgm", path, number, 'v');
1002 	image_dump_pgm(
1003 		image->v - (EDGE_SIZE2 * edged_width / 2 + EDGE_SIZE2),
1004 		edged_width / 2, edged_height / 2, filename);
1005 
1006 	return 0;
1007 }
1008 #endif
1009 
1010 
1011 
1012 /* dump image to yuvpgm file */
1013 
1014 #include <stdio.h>
1015 
1016 int
image_dump_yuvpgm(const IMAGE * image,const uint32_t edged_width,const uint32_t width,const uint32_t height,char * filename)1017 image_dump_yuvpgm(const IMAGE * image,
1018 				  const uint32_t edged_width,
1019 				  const uint32_t width,
1020 				  const uint32_t height,
1021 				  char *filename)
1022 {
1023 	FILE *f;
1024 	char hdr[1024];
1025 	uint32_t i;
1026 	uint8_t *bmp1;
1027 	uint8_t *bmp2;
1028 
1029 
1030 	f = fopen(filename, "wb");
1031 	if (f == NULL) {
1032 		return -1;
1033 	}
1034 	sprintf(hdr, "P5\n#xvid\n%i %i\n255\n", width, (3 * height) / 2);
1035 	fwrite(hdr, strlen(hdr), 1, f);
1036 
1037 	bmp1 = image->y;
1038 	for (i = 0; i < height; i++) {
1039 		fwrite(bmp1, width, 1, f);
1040 		bmp1 += edged_width;
1041 	}
1042 
1043 	bmp1 = image->u;
1044 	bmp2 = image->v;
1045 	for (i = 0; i < height / 2; i++) {
1046 		fwrite(bmp1, width / 2, 1, f);
1047 		fwrite(bmp2, width / 2, 1, f);
1048 		bmp1 += edged_width / 2;
1049 		bmp2 += edged_width / 2;
1050 	}
1051 
1052 	fclose(f);
1053 	return 0;
1054 }
1055 
1056 
1057 float
image_mad(const IMAGE * img1,const IMAGE * img2,uint32_t stride,uint32_t width,uint32_t height)1058 image_mad(const IMAGE * img1,
1059 		  const IMAGE * img2,
1060 		  uint32_t stride,
1061 		  uint32_t width,
1062 		  uint32_t height)
1063 {
1064 	const uint32_t stride2 = stride / 2;
1065 	const uint32_t width2 = width / 2;
1066 	const uint32_t height2 = height / 2;
1067 
1068 	uint32_t x, y;
1069 	uint32_t sum = 0;
1070 
1071 	for (y = 0; y < height; y++)
1072 		for (x = 0; x < width; x++)
1073 			sum += abs(img1->y[x + y * stride] - img2->y[x + y * stride]);
1074 
1075 	for (y = 0; y < height2; y++)
1076 		for (x = 0; x < width2; x++)
1077 			sum += abs(img1->u[x + y * stride2] - img2->u[x + y * stride2]);
1078 
1079 	for (y = 0; y < height2; y++)
1080 		for (x = 0; x < width2; x++)
1081 			sum += abs(img1->v[x + y * stride2] - img2->v[x + y * stride2]);
1082 
1083 	return (float) sum / (width * height * 3 / 2);
1084 }
1085 
1086 void
output_slice(IMAGE * cur,int stride,int width,xvid_image_t * out_frm,int mbx,int mby,int mbl)1087 output_slice(IMAGE * cur, int stride, int width, xvid_image_t* out_frm, int mbx, int mby,int mbl) {
1088   uint8_t *dY,*dU,*dV,*sY,*sU,*sV;
1089   int stride2 = stride >> 1;
1090   int w = mbl << 4, w2,i;
1091 
1092   if(w > width)
1093     w = width;
1094   w2 = w >> 1;
1095 
1096   dY = (uint8_t*)out_frm->plane[0] + (mby << 4) * out_frm->stride[0] + (mbx << 4);
1097   dU = (uint8_t*)out_frm->plane[1] + (mby << 3) * out_frm->stride[1] + (mbx << 3);
1098   dV = (uint8_t*)out_frm->plane[2] + (mby << 3) * out_frm->stride[2] + (mbx << 3);
1099   sY = cur->y + (mby << 4) * stride + (mbx << 4);
1100   sU = cur->u + (mby << 3) * stride2 + (mbx << 3);
1101   sV = cur->v + (mby << 3) * stride2 + (mbx << 3);
1102 
1103   for(i = 0 ; i < 16 ; i++) {
1104     memcpy(dY,sY,w);
1105     dY += out_frm->stride[0];
1106     sY += stride;
1107   }
1108   for(i = 0 ; i < 8 ; i++) {
1109     memcpy(dU,sU,w2);
1110     dU += out_frm->stride[1];
1111     sU += stride2;
1112   }
1113   for(i = 0 ; i < 8 ; i++) {
1114     memcpy(dV,sV,w2);
1115     dV += out_frm->stride[2];
1116     sV += stride2;
1117   }
1118 }
1119 
1120 
1121 void
image_clear(IMAGE * img,int width,int height,int edged_width,int y,int u,int v)1122 image_clear(IMAGE * img, int width, int height, int edged_width,
1123 					int y, int u, int v)
1124 {
1125 	uint8_t * p;
1126 	int i;
1127 
1128 	p = img->y;
1129 	for (i = 0; i < height; i++) {
1130 		memset(p, y, width);
1131 		p += edged_width;
1132 	}
1133 
1134 	p = img->u;
1135 	for (i = 0; i < height/2; i++) {
1136 		memset(p, u, width/2);
1137 		p += edged_width/2;
1138 	}
1139 
1140 	p = img->v;
1141 	for (i = 0; i < height/2; i++) {
1142 		memset(p, v, width/2);
1143 		p += edged_width/2;
1144 	}
1145 }
1146 
1147 /****************************************************************************/
1148 
1149 static void (*deintl_core)(uint8_t *, int width, int height, const int stride) = 0;
1150 extern void xvid_deinterlace_sse(uint8_t *, int width, int height, const int stride);
1151 
1152 #define CLIP_255(x)   ( ((x)&~255) ? ((-(x)) >> (8*sizeof((x))-1))&0xff : (x) )
1153 
deinterlace_c(uint8_t * pix,int width,int height,const int bps)1154 static void deinterlace_c(uint8_t *pix, int width, int height, const int bps)
1155 {
1156   pix += bps;
1157   while(width-->0)
1158   {
1159     int p1 = pix[-bps];
1160     int p2 = pix[0];
1161     int p0 = p2;
1162     int j = (height>>1) - 1;
1163     int V;
1164     unsigned char *P = pix++;
1165     while(j-->0)
1166     {
1167       const int  p3 = P[  bps];
1168       const int  p4 = P[2*bps];
1169       V =  ((p1+p3+1)>>1) + ((p2 - ((p0+p4+1)>>1)) >> 2);
1170       P[0] = CLIP_255( V );
1171       p0 = p2;
1172       p1 = p3;
1173       p2 = p4;
1174       P += 2*bps;
1175     }
1176     V =  ((p1+p1+1)>>1) + ((p2 - ((p0+p2+1)>>1)) >> 2);
1177     P[0] = CLIP_255( V );
1178   }
1179 }
1180 #undef CLIP_255
1181 
xvid_image_deinterlace(xvid_image_t * img,int width,int height,int bottom_first)1182 int xvid_image_deinterlace(xvid_image_t* img, int width, int height, int bottom_first)
1183 {
1184 	if (height&1)
1185 		return 0;
1186 	if (img->csp!=XVID_CSP_PLANAR && img->csp!=XVID_CSP_I420 && img->csp!=XVID_CSP_YV12)
1187 		return 0;       /* not yet supported */
1188 	if (deintl_core==0) {
1189 		deintl_core = deinterlace_c;
1190 #if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64)
1191 		{
1192 			int cpu_flags = check_cpu_features();
1193 			if (cpu_flags & XVID_CPU_MMX)
1194 				deintl_core = xvid_deinterlace_sse;
1195 		}
1196 #endif
1197 	}
1198 	if (!bottom_first) {
1199 		deintl_core(img->plane[0], width,    height,    img->stride[0]);
1200 		deintl_core(img->plane[1], width>>1, height>>1, img->stride[1]);
1201 		deintl_core(img->plane[2], width>>1, height>>1, img->stride[2]);
1202 	}
1203 	else {
1204 		deintl_core((uint8_t *)img->plane[0] + ( height    -1)*img->stride[0], width,    height,    -img->stride[0]);
1205 		deintl_core((uint8_t *)img->plane[1] + ((height>>1)-1)*img->stride[1], width>>1, height>>1, -img->stride[1]);
1206 		deintl_core((uint8_t *)img->plane[2] + ((height>>1)-1)*img->stride[2], width>>1, height>>1, -img->stride[2]);
1207 	}
1208 	emms();
1209 
1210 	return 1;
1211 }
1212 
1213