1 /*
2  *			GPAC - Multimedia Framework C SDK
3  *
4  *			Authors: Jean Le Feuvre
5  *			Copyright (c) Telecom ParisTech 2000-2012
6  *					All rights reserved
7  *
8  *  This file is part of GPAC / common tools sub-project
9  *
10  *  GPAC is free software; you can redistribute it and/or modify
11  *  it under the terms of the GNU Lesser General Public License as published by
12  *  the Free Software Foundation; either version 2, or (at your option)
13  *  any later version.
14  *
15  *  GPAC is distributed in the hope that it will be useful,
16  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  *  GNU Lesser General Public License for more details.
19  *
20  *  You should have received a copy of the GNU Lesser General Public
21  *  License along with this library; see the file COPYING.  If not, write to
22  *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23  *
24  */
25 
26 
27 #include <gpac/tools.h>
28 #include <gpac/constants.h>
29 #include <gpac/color.h>
30 
31 #ifndef GPAC_DISABLE_PLAYER
32 
33 static GF_Err color_write_nv12_10_to_yuv(GF_VideoSurface *vs_dst, GF_VideoSurface *vs_src, GF_Window *_src_wnd, Bool swap_up);
34 static GF_Err color_write_yv12_10_to_yuv(GF_VideoSurface *vs_dst, GF_VideoSurface *vs_src, const GF_Window *_src_wnd, Bool swap_up);
35 static GF_Err color_write_yuv422_10_to_yuv422(GF_VideoSurface *vs_dst, GF_VideoSurface *vs_src, GF_Window *_src_wnd, Bool swap_up);
36 static GF_Err color_write_yuv422_10_to_yuv(GF_VideoSurface *vs_dst, GF_VideoSurface *vs_src, GF_Window *_src_wnd, Bool swap_up);
37 static GF_Err color_write_yuv444_10_to_yuv444(GF_VideoSurface *vs_dst, GF_VideoSurface *vs_src, GF_Window *_src_wnd, Bool swap_up);
38 static GF_Err color_write_yuv444_10_to_yuv(GF_VideoSurface *vs_dst, GF_VideoSurface *vs_src, GF_Window *_src_wnd, Bool swap_up);
39 static GF_Err color_write_yuv420_to_yuv(GF_VideoSurface *vs_dst, GF_VideoSurface *vs_src, GF_Window *_src_wnd, Bool swap_uv);
40 static GF_Err color_write_yuv422_to_yuv(GF_VideoSurface *vs_dst, GF_VideoSurface *vs_src, GF_Window *_src_wnd, Bool swap_uv);
41 static GF_Err color_write_yuv444_to_yuv(GF_VideoSurface *vs_dst, GF_VideoSurface *vs_src, GF_Window *_src_wnd, Bool swap_uv);
42 static GF_Err color_write_yvyu_to_yuv(GF_VideoSurface *vs_dst, GF_VideoSurface *vs_src, GF_Window *_src_wnd, Bool swap_uv);
43 static GF_Err color_write_rgb_to_24(GF_VideoSurface *vs_dst, GF_VideoSurface *vs_src, GF_Window *_src_wnd);
44 static GF_Err color_write_rgb_to_32(GF_VideoSurface *vs_dst, GF_VideoSurface *vs_src, GF_Window *_src_wnd);
45 
46 
colmask(s32 a,s32 n)47 static GFINLINE u8 colmask(s32 a, s32 n)
48 {
49 	s32 mask = (1 << n) - 1;
50 	return (u8) (a & (0xff & ~mask)) | ((-((a >> n) & 1)) & mask);
51 }
52 
53 
54 /* YUV -> RGB conversion loading two lines at each call */
55 
56 #define col_clip(a) MAX(0, MIN(255, a))
57 #define SCALEBITS_OUT	13
58 #define FIX_OUT(x)		((unsigned short) ((x) * (1L<<SCALEBITS_OUT) + 0.5))
59 
60 static s32 RGB_Y[256];
61 static s32 B_U[256];
62 static s32 G_U[256];
63 static s32 G_V[256];
64 static s32 R_V[256];
65 
66 
67 static s32 yuv2rgb_is_init = 0;
yuv2rgb_init(void)68 static void yuv2rgb_init(void)
69 {
70 	s32 i;
71 	if (yuv2rgb_is_init) return;
72 	yuv2rgb_is_init = 1;
73 
74 	for(i = 0; i < 256; i++) {
75 		RGB_Y[i] = FIX_OUT(1.164) * (i - 16);
76 		B_U[i] = FIX_OUT(2.018) * (i - 128);
77 		G_U[i] = FIX_OUT(0.391) * (i - 128);
78 		G_V[i] = FIX_OUT(0.813) * (i - 128);
79 		R_V[i] = FIX_OUT(1.596) * (i - 128);
80 	}
81 }
82 
yuv_load_lines_planar(unsigned char * dst,s32 dststride,unsigned char * y_src,unsigned char * u_src,unsigned char * v_src,s32 y_stride,s32 uv_stride,s32 width,Bool dst_yuv)83 static void yuv_load_lines_planar(unsigned char *dst, s32 dststride, unsigned char *y_src, unsigned char *u_src, unsigned char * v_src, s32 y_stride, s32 uv_stride, s32 width, Bool dst_yuv)
84 {
85 	u32 hw, x;
86 	unsigned char *dst2 = (unsigned char *) dst + dststride;
87 	unsigned char *y_src2 = (unsigned char *) y_src + y_stride;
88 
89 	hw = width / 2;
90 	if (dst_yuv) {
91 		for (x = 0; x < hw; x++) {
92 			dst[0] = dst[4] = dst2[0] = dst2[4] = v_src[x];
93 			dst[1] = dst[5] = dst2[1] = dst2[5] = u_src[x];
94 
95 			dst[2] = *y_src;
96 			dst[3] = 0xFF;
97 			y_src++;
98 
99 			dst[6] = *y_src;
100 			dst[7] = 0xFF;
101 			y_src++;
102 
103 			dst2[2] = *y_src2;
104 			dst2[3] = 0xFF;
105 			y_src2++;
106 
107 			dst2[6] = *y_src2;
108 			dst2[7] = 0xFF;
109 			y_src2++;
110 
111 			dst += 8;
112 			dst2 += 8;
113 		}
114 		return;
115 	}
116 	for (x = 0; x < hw; x++) {
117 		s32 u, v;
118 		s32 b_u, g_uv, r_v, rgb_y;
119 
120 		u = u_src[x];
121 		v = v_src[x];
122 
123 		b_u = B_U[u];
124 		g_uv = G_U[u] + G_V[v];
125 		r_v = R_V[v];
126 
127 		rgb_y = RGB_Y[*y_src];
128 		dst[0] = col_clip( (rgb_y + r_v) >> SCALEBITS_OUT);
129 		dst[1] = col_clip( (rgb_y - g_uv) >> SCALEBITS_OUT);
130 		dst[2] = col_clip( (rgb_y + b_u) >> SCALEBITS_OUT);
131 		dst[3] = 0xFF;
132 		y_src++;
133 
134 		rgb_y = RGB_Y[*y_src];
135 		dst[4] = col_clip( (rgb_y + r_v) >> SCALEBITS_OUT);
136 		dst[5] = col_clip( (rgb_y - g_uv) >> SCALEBITS_OUT);
137 		dst[6] = col_clip( (rgb_y + b_u) >> SCALEBITS_OUT);
138 		dst[7] = 0xFF;
139 		y_src++;
140 
141 		rgb_y = RGB_Y[*y_src2];
142 		dst2[0] = col_clip( (rgb_y + r_v) >> SCALEBITS_OUT);
143 		dst2[1] = col_clip( (rgb_y - g_uv) >> SCALEBITS_OUT);
144 		dst2[2] = col_clip( (rgb_y + b_u) >> SCALEBITS_OUT);
145 		dst2[3] = 0xFF;
146 		y_src2++;
147 
148 		rgb_y = RGB_Y[*y_src2];
149 		dst2[4] = col_clip( (rgb_y + r_v) >> SCALEBITS_OUT);
150 		dst2[5] = col_clip( (rgb_y - g_uv) >> SCALEBITS_OUT);
151 		dst2[6] = col_clip( (rgb_y + b_u) >> SCALEBITS_OUT);
152 		dst2[7] = 0xFF;
153 		y_src2++;
154 
155 		dst += 8;
156 		dst2 += 8;
157 	}
158 }
yuv422_load_lines_planar(unsigned char * dst,s32 dststride,unsigned char * y_src,unsigned char * u_src,unsigned char * v_src,s32 y_stride,s32 uv_stride,s32 width,Bool dst_yuv)159 static void yuv422_load_lines_planar(unsigned char *dst, s32 dststride, unsigned char *y_src, unsigned char *u_src, unsigned char * v_src, s32 y_stride, s32 uv_stride, s32 width, Bool dst_yuv)
160 {
161 	u32 hw, x;
162 	unsigned char *dst2 = (unsigned char *)dst + dststride;
163 	unsigned char *y_src2 = (unsigned char *)y_src + y_stride;
164 	unsigned char *u_src2 = (unsigned char *)u_src + uv_stride;
165 	unsigned char *v_src2 = (unsigned char *)v_src + uv_stride;
166 
167 	hw = width / 2;
168 	if (dst_yuv) {
169 		for (x = 0; x < hw; x++) {
170 			dst[0] = dst[4] = *v_src;
171 			dst[1] = dst[5] = *u_src;
172 			dst[2] = *y_src;
173 			y_src++;
174 			dst[3] = 0xFF;
175 			dst[6] = *y_src;
176 			y_src++;
177 			dst[7] = 0xFF;
178 
179 			u_src++;
180 			v_src++;
181 
182 			dst2[0] = dst2[4] = *v_src2;
183 			dst2[1] = dst2[5] = *u_src2;
184 			dst2[2] = *y_src;
185 			y_src2++;
186 			dst2[3] = 0xFF;
187 			dst2[6] = *y_src;
188 			y_src2++;
189 			dst2[7] = 0xFF;
190 
191 			u_src2++;
192 			v_src2++;
193 
194 			dst += 8;
195 			dst2 += 8;
196 		}
197 		return;
198 	}
199 
200 	for (x = 0; x < hw; x++) {
201 		s32 b_u, g_uv, r_v, rgb_y;
202 
203 		b_u = B_U[*u_src];
204 		g_uv = G_U[*u_src] + G_V[*v_src];
205 		r_v = R_V[*v_src];
206 		rgb_y = RGB_Y[*y_src];
207 		dst[0] = col_clip((rgb_y + r_v) >> SCALEBITS_OUT);
208 		dst[1] = col_clip((rgb_y - g_uv) >> SCALEBITS_OUT);
209 		dst[2] = col_clip((rgb_y + b_u) >> SCALEBITS_OUT);
210 		dst[3] = 0xFF;
211 		y_src++;
212 
213 		rgb_y = RGB_Y[*y_src];
214 		dst[4] = col_clip((rgb_y + r_v) >> SCALEBITS_OUT);
215 		dst[5] = col_clip((rgb_y - g_uv) >> SCALEBITS_OUT);
216 		dst[6] = col_clip((rgb_y + b_u) >> SCALEBITS_OUT);
217 		dst[7] = 0xFF;
218 		y_src++;
219 		u_src++;
220 		v_src++;
221 
222 		b_u = B_U[*u_src2];
223 		g_uv = G_U[*u_src2] + G_V[*v_src2];
224 		r_v = R_V[*v_src2];
225 		rgb_y = RGB_Y[*y_src2];
226 		dst2[0] = col_clip((rgb_y + r_v) >> SCALEBITS_OUT);
227 		dst2[1] = col_clip((rgb_y - g_uv) >> SCALEBITS_OUT);
228 		dst2[2] = col_clip((rgb_y + b_u) >> SCALEBITS_OUT);
229 		dst2[3] = 0xFF;
230 		y_src2++;
231 
232 		rgb_y = RGB_Y[*y_src2];
233 		dst2[4] = col_clip((rgb_y + r_v) >> SCALEBITS_OUT);
234 		dst2[5] = col_clip((rgb_y - g_uv) >> SCALEBITS_OUT);
235 		dst2[6] = col_clip((rgb_y + b_u) >> SCALEBITS_OUT);
236 		dst2[7] = 0xFF;
237 		y_src2++;
238 		u_src2++;
239 		v_src2++;
240 
241 		dst += 8;
242 		dst2 += 8;
243 	}
244 }
yuv444_load_lines_planar(unsigned char * dst,s32 dststride,unsigned char * y_src,unsigned char * u_src,unsigned char * v_src,s32 y_stride,s32 uv_stride,s32 width,Bool dst_yuv)245 static void yuv444_load_lines_planar(unsigned char *dst, s32 dststride, unsigned char *y_src, unsigned char *u_src, unsigned char * v_src, s32 y_stride, s32 uv_stride, s32 width, Bool dst_yuv)
246 {
247 	u32 hw, x;
248 	unsigned char *dst2 = (unsigned char *)dst + dststride;
249 	unsigned char *y_src2 = (unsigned char *)y_src + y_stride;
250 	unsigned char *u_src2 = (unsigned char *)u_src + uv_stride;
251 	unsigned char *v_src2 = (unsigned char *)v_src + uv_stride;
252 
253 	hw = width / 2;
254 
255 	if (dst_yuv) {
256 		for (x = 0; x < hw; x++) {
257 			dst[0] = *v_src;
258 			dst[1] = *u_src;
259 			dst[2] = *y_src;
260 			dst[3] = 0xFF;
261 
262 			y_src++;
263 			u_src++;
264 			v_src++;
265 
266 			dst[4] = *v_src;
267 			dst[5] = *u_src;
268 			dst[6] = *y_src;
269 			dst[7] = 0xFF;
270 
271 			y_src++;
272 			u_src++;
273 			v_src++;
274 
275 			dst2[0] = *v_src2;
276 			dst2[1] = *u_src2;
277 			dst2[2] = *y_src2;
278 			dst2[3] = 0xFF;
279 
280 			y_src2++;
281 			u_src2++;
282 			v_src2++;
283 
284 			dst2[4] = *v_src2;
285 			dst2[5] = *u_src2;
286 			dst2[6] = *y_src2;
287 			dst2[7] = 0xFF;
288 
289 			y_src2++;
290 			u_src2++;
291 			v_src2++;
292 
293 			dst += 8;
294 			dst2 += 8;
295 		}
296 		return;
297 	}
298 
299 	for (x = 0; x < hw; x++) {
300 		s32 b_u, g_uv, r_v, rgb_y;
301 
302 
303 		b_u = B_U[*u_src];
304 		g_uv = G_U[*u_src] + G_V[*v_src];
305 		r_v = R_V[*v_src];
306 		rgb_y = RGB_Y[*y_src];
307 		dst[0] = col_clip((rgb_y + r_v) >> SCALEBITS_OUT);
308 		dst[1] = col_clip((rgb_y - g_uv) >> SCALEBITS_OUT);
309 		dst[2] = col_clip((rgb_y + b_u) >> SCALEBITS_OUT);
310 		dst[3] = 0xFF;
311 		y_src++;
312 		u_src++;
313 		v_src++;
314 
315 
316 		b_u = B_U[*u_src];
317 		g_uv = G_U[*u_src] + G_V[*v_src];
318 		r_v = R_V[*v_src];
319 		rgb_y = RGB_Y[*y_src];
320 		dst[4] = col_clip((rgb_y + r_v) >> SCALEBITS_OUT);
321 		dst[5] = col_clip((rgb_y - g_uv) >> SCALEBITS_OUT);
322 		dst[6] = col_clip((rgb_y + b_u) >> SCALEBITS_OUT);
323 		dst[7] = 0xFF;
324 		y_src++;
325 		u_src++;
326 		v_src++;
327 
328 
329 		b_u = B_U[*u_src2];
330 		g_uv = G_U[*u_src2] + G_V[*v_src2];
331 		r_v = R_V[*v_src2];
332 		rgb_y = RGB_Y[*y_src2];
333 		dst2[0] = col_clip((rgb_y + r_v) >> SCALEBITS_OUT);
334 		dst2[1] = col_clip((rgb_y - g_uv) >> SCALEBITS_OUT);
335 		dst2[2] = col_clip((rgb_y + b_u) >> SCALEBITS_OUT);
336 		dst2[3] = 0xFF;
337 		y_src2++;
338 		u_src2++;
339 		v_src2++;
340 
341 
342 		b_u = B_U[*u_src2];
343 		g_uv = G_U[*u_src2] + G_V[*v_src2];
344 		r_v = R_V[*v_src2];
345 		rgb_y = RGB_Y[*y_src2];
346 
347 		dst2[4] = col_clip((rgb_y + r_v) >> SCALEBITS_OUT);
348 		dst2[5] = col_clip((rgb_y - g_uv) >> SCALEBITS_OUT);
349 		dst2[6] = col_clip((rgb_y + b_u) >> SCALEBITS_OUT);
350 		dst2[7] = 0xFF;
351 		y_src2++;
352 		u_src2++;
353 		v_src2++;
354 
355 		dst += 8;
356 		dst2 += 8;
357 	}
358 }
359 
yuv_10_load_lines_planar(unsigned char * dst,s32 dststride,unsigned char * _y_src,unsigned char * _u_src,unsigned char * _v_src,s32 y_stride,s32 uv_stride,s32 width,Bool dst_yuv)360 static void yuv_10_load_lines_planar(unsigned char *dst, s32 dststride, unsigned char *_y_src, unsigned char *_u_src, unsigned char *_v_src, s32 y_stride, s32 uv_stride, s32 width, Bool dst_yuv)
361 {
362 	u32 hw, x;
363 	unsigned char *dst2 = (unsigned char *) dst + dststride;
364 	unsigned short *y_src2 = (unsigned short *) (_y_src + y_stride);
365 	unsigned short *y_src = (unsigned short *)_y_src;
366 	unsigned short *u_src = (unsigned short *)_u_src;
367 	unsigned short *v_src = (unsigned short *)_v_src;
368 
369 
370 	hw = width / 2;
371 	if (dst_yuv) {
372 		for (x = 0; x < hw; x++) {
373 			dst[0] = dst[4] = dst2[0] = dst2[4] = v_src[x] >> 2;
374 			dst[1] = dst[5] = dst2[1] = dst2[5] = u_src[x] >> 2;
375 			dst[2] = *y_src >> 2;
376 			y_src++;
377 			dst[3] = 0xFF;
378 
379 			dst[6] = *y_src >> 2;
380 			y_src++;
381 			dst[7] = 0xFF;
382 
383 			dst2[2] = *y_src2 >> 2;
384 			y_src2++;
385 			dst2[3] = 0xFF;
386 
387 			dst2[6] = *y_src2 >> 2;
388 			y_src2++;
389 			dst2[7] = 0xFF;
390 
391 			dst += 8;
392 			dst2 += 8;
393 		}
394 		return;
395 	}
396 	for (x = 0; x < hw; x++) {
397 		s32 u, v;
398 		s32 b_u, g_uv, r_v, rgb_y;
399 
400 		u = u_src[x] >> 2;
401 		v = v_src[x] >> 2;
402 
403 		b_u = B_U[u];
404 		g_uv = G_U[u] + G_V[v];
405 		r_v = R_V[v];
406 
407 		rgb_y = RGB_Y[*y_src >> 2];
408 		dst[0] = col_clip( (rgb_y + r_v) >> SCALEBITS_OUT);
409 		dst[1] = col_clip( (rgb_y - g_uv) >> SCALEBITS_OUT);
410 		dst[2] = col_clip( (rgb_y + b_u) >> SCALEBITS_OUT);
411 		dst[3] = 0xFF;
412 		y_src++;
413 
414 		rgb_y = RGB_Y[*y_src >> 2];
415 		dst[4] = col_clip( (rgb_y + r_v) >> SCALEBITS_OUT);
416 		dst[5] = col_clip( (rgb_y - g_uv) >> SCALEBITS_OUT);
417 		dst[6] = col_clip( (rgb_y + b_u) >> SCALEBITS_OUT);
418 		dst[7] = 0xFF;
419 		y_src++;
420 
421 		rgb_y = RGB_Y[*y_src2 >> 2];
422 		dst2[0] = col_clip( (rgb_y + r_v) >> SCALEBITS_OUT);
423 		dst2[1] = col_clip( (rgb_y - g_uv) >> SCALEBITS_OUT);
424 		dst2[2] = col_clip( (rgb_y + b_u) >> SCALEBITS_OUT);
425 		dst2[3] = 0xFF;
426 		y_src2++;
427 
428 		rgb_y = RGB_Y[*y_src2 >> 2];
429 		dst2[4] = col_clip( (rgb_y + r_v) >> SCALEBITS_OUT);
430 		dst2[5] = col_clip( (rgb_y - g_uv) >> SCALEBITS_OUT);
431 		dst2[6] = col_clip( (rgb_y + b_u) >> SCALEBITS_OUT);
432 		dst2[7] = 0xFF;
433 		y_src2++;
434 
435 		dst += 8;
436 		dst2 += 8;
437 	}
438 }
yuv422_10_load_lines_planar(unsigned char * dst,s32 dststride,unsigned char * _y_src,unsigned char * _u_src,unsigned char * _v_src,s32 y_stride,s32 uv_stride,s32 width,Bool dst_yuv)439 static void yuv422_10_load_lines_planar(unsigned char *dst, s32 dststride, unsigned char *_y_src, unsigned char *_u_src, unsigned char *_v_src, s32 y_stride, s32 uv_stride, s32 width, Bool dst_yuv)
440 {
441 	u32 hw, x;
442 	unsigned char *dst2 = (unsigned char *)dst + dststride;
443 	unsigned short *y_src2 = (unsigned short *)(_y_src + y_stride);
444 	unsigned short *u_src2 = (unsigned short *)(_u_src + uv_stride);
445 	unsigned short *v_src2 = (unsigned short *)(_v_src + uv_stride);
446 	unsigned short *y_src = (unsigned short *)_y_src;
447 	unsigned short *u_src = (unsigned short *)_u_src;
448 	unsigned short *v_src = (unsigned short *)_v_src;
449 
450 	hw = width / 2;
451 
452 	if (dst_yuv) {
453 		for (x = 0; x < hw; x++) {
454 			dst[0] = dst[4] = *v_src >> 2;
455 			dst[1] = dst[5] = *u_src >> 2;
456 			dst[2] = *y_src >> 2;
457 			y_src++;
458 			dst[3] = 0xFF;
459 
460 			dst[6] = *y_src >> 2;
461 			y_src++;
462 			dst[7] = 0xFF;
463 
464 			dst2[0] = dst2[4] = *v_src2 >> 2;
465 			dst2[1] = dst2[5] = *u_src2 >> 2;
466 			dst2[2] = *y_src2 >> 2;
467 			y_src2++;
468 			dst2[3] = 0xFF;
469 
470 			dst2[6] = *y_src2 >> 2;
471 			y_src2++;
472 			dst2[7] = 0xFF;
473 
474 			y_src2++;
475 			u_src2++;
476 			v_src2++;
477 
478 			dst += 8;
479 			dst2 += 8;
480 		}
481 		return;
482 	}
483 	for (x = 0; x < hw; x++) {
484 		s32 b_u, g_uv, r_v, rgb_y;
485 
486 		b_u = B_U[*u_src >> 2];
487 		g_uv = G_U[*u_src >> 2] + G_V[*v_src >> 2];
488 		r_v = R_V[*v_src >> 2];
489 		rgb_y = RGB_Y[*y_src >> 2];
490 		dst[0] = col_clip((rgb_y + r_v) >> SCALEBITS_OUT);
491 		dst[1] = col_clip((rgb_y - g_uv) >> SCALEBITS_OUT);
492 		dst[2] = col_clip((rgb_y + b_u) >> SCALEBITS_OUT);
493 		dst[3] = 0xFF;
494 		y_src++;
495 
496 
497 
498 
499 		rgb_y = RGB_Y[*y_src >> 2];
500 		dst[4] = col_clip((rgb_y + r_v) >> SCALEBITS_OUT);
501 		dst[5] = col_clip((rgb_y - g_uv) >> SCALEBITS_OUT);
502 		dst[6] = col_clip((rgb_y + b_u) >> SCALEBITS_OUT);
503 		dst[7] = 0xFF;
504 		y_src++;
505 		u_src++;
506 		v_src++;
507 
508 
509 		b_u = B_U[*u_src2 >> 2];
510 		g_uv = G_U[*u_src2 >> 2] + G_V[*v_src2 >> 2];
511 		r_v = R_V[*v_src2 >> 2];
512 		rgb_y = RGB_Y[*y_src2 >> 2];
513 		dst2[0] = col_clip((rgb_y + r_v) >> SCALEBITS_OUT);
514 		dst2[1] = col_clip((rgb_y - g_uv) >> SCALEBITS_OUT);
515 		dst2[2] = col_clip((rgb_y + b_u) >> SCALEBITS_OUT);
516 		dst2[3] = 0xFF;
517 		y_src2++;
518 
519 		rgb_y = RGB_Y[*y_src2 >> 2];
520 		dst2[4] = col_clip((rgb_y + r_v) >> SCALEBITS_OUT);
521 		dst2[5] = col_clip((rgb_y - g_uv) >> SCALEBITS_OUT);
522 		dst2[6] = col_clip((rgb_y + b_u) >> SCALEBITS_OUT);
523 		dst2[7] = 0xFF;
524 		y_src2++;
525 		u_src2++;
526 		v_src2++;
527 
528 		dst += 8;
529 		dst2 += 8;
530 	}
531 }
yuv444_10_load_lines_planar(unsigned char * dst,s32 dststride,unsigned char * _y_src,unsigned char * _u_src,unsigned char * _v_src,s32 y_stride,s32 uv_stride,s32 width,Bool dst_yuv)532 static void yuv444_10_load_lines_planar(unsigned char *dst, s32 dststride, unsigned char *_y_src, unsigned char *_u_src, unsigned char *_v_src, s32 y_stride, s32 uv_stride, s32 width, Bool dst_yuv)
533 {
534 	u32 hw, x;
535 	unsigned char *dst2 = (unsigned char *)dst + dststride;
536 	unsigned short * y_src2 = (unsigned short *)(_y_src + y_stride);
537 	unsigned short * u_src2 = (unsigned short *)(_u_src + uv_stride);
538 	unsigned short * v_src2 = (unsigned short *)(_v_src + uv_stride);
539 	unsigned short * y_src = (unsigned short *)_y_src;
540 	unsigned short * u_src = (unsigned short *)_u_src;
541 	unsigned short * v_src = (unsigned short *)_v_src;
542 
543 	hw = width / 2;
544 	if (dst_yuv) {
545 		for (x = 0; x < hw; x++) {
546 			dst[0] = *v_src >> 2;
547 			dst[1] = *u_src >> 2;
548 			dst[2] = *y_src >> 2;
549 			dst[3] = 0xFF;
550 			y_src++;
551 			u_src++;
552 			v_src++;
553 
554 			dst[4] = *v_src >> 2;
555 			dst[5] = *u_src >> 2;
556 			dst[6] = *y_src >> 2;
557 			dst[7] = 0xFF;
558 			y_src++;
559 			u_src++;
560 			v_src++;
561 
562 			dst2[0] = *v_src2 >> 2;
563 			dst2[1] = *u_src2 >> 2;
564 			dst2[2] = *y_src2 >> 2;
565 			dst2[3] = 0xFF;
566 			y_src2++;
567 			u_src2++;
568 			v_src2++;
569 
570 			dst2[4] = *v_src2 >> 2;
571 			dst2[5] = *u_src2 >> 2;
572 			dst2[6] = *y_src2 >> 2;
573 			dst2[7] = 0xFF;
574 			y_src2++;
575 			u_src2++;
576 			v_src2++;
577 
578 			dst += 8;
579 			dst2 += 8;
580 		}
581 		return;
582 	}
583 	for (x = 0; x < hw; x++) {
584 		s32 b_u, g_uv, r_v, rgb_y;
585 
586 
587 		b_u = B_U[*u_src >> 2];
588 		g_uv = G_U[*u_src >> 2] + G_V[*v_src >> 2];
589 		r_v = R_V[*v_src >> 2];
590 		rgb_y = RGB_Y[*y_src >> 2];
591 		dst[0] = col_clip((rgb_y + r_v) >> SCALEBITS_OUT);
592 		dst[1] = col_clip((rgb_y - g_uv) >> SCALEBITS_OUT);
593 		dst[2] = col_clip((rgb_y + b_u) >> SCALEBITS_OUT);
594 		dst[3] = 0xFF;
595 		y_src++;
596 		u_src++;
597 		v_src++;
598 
599 
600 		b_u = B_U[*u_src >> 2];
601 		g_uv = G_U[*u_src >> 2] + G_V[*v_src >> 2];
602 		r_v = R_V[*v_src >> 2];
603 		rgb_y = RGB_Y[*y_src >> 2];
604 		dst[4] = col_clip((rgb_y + r_v) >> SCALEBITS_OUT);
605 		dst[5] = col_clip((rgb_y - g_uv) >> SCALEBITS_OUT);
606 		dst[6] = col_clip((rgb_y + b_u) >> SCALEBITS_OUT);
607 		dst[7] = 0xFF;
608 		y_src++;
609 		u_src++;
610 		v_src++;
611 
612 
613 		b_u = B_U[*u_src2 >> 2];
614 		g_uv = G_U[*u_src2 >> 2] + G_V[*v_src2 >> 2];
615 		r_v = R_V[*v_src2 >> 2];
616 		rgb_y = RGB_Y[*y_src2 >> 2];
617 		dst2[0] = col_clip((rgb_y + r_v) >> SCALEBITS_OUT);
618 		dst2[1] = col_clip((rgb_y - g_uv) >> SCALEBITS_OUT);
619 		dst2[2] = col_clip((rgb_y + b_u) >> SCALEBITS_OUT);
620 		dst2[3] = 0xFF;
621 		y_src2++;
622 		u_src2++;
623 		v_src2++;
624 
625 
626 		b_u = B_U[*u_src2 >> 2];
627 		g_uv = G_U[*u_src2 >> 2] + G_V[*v_src2 >> 2];
628 		r_v = R_V[*v_src2 >> 2];
629 		rgb_y = RGB_Y[*y_src2 >> 2];
630 
631 		dst2[4] = col_clip((rgb_y + r_v) >> SCALEBITS_OUT);
632 		dst2[5] = col_clip((rgb_y - g_uv) >> SCALEBITS_OUT);
633 		dst2[6] = col_clip((rgb_y + b_u) >> SCALEBITS_OUT);
634 		dst2[7] = 0xFF;
635 		y_src2++;
636 		u_src2++;
637 		v_src2++;
638 
639 		dst += 8;
640 		dst2 += 8;
641 	}
642 }
643 
yuv_load_lines_packed(unsigned char * dst,s32 dststride,unsigned char * y_src,unsigned char * u_src,unsigned char * v_src,s32 width,Bool dst_yuv)644 static void yuv_load_lines_packed(unsigned char *dst, s32 dststride, unsigned char *y_src, unsigned char *u_src, unsigned char * v_src, s32 width, Bool dst_yuv)
645 {
646 	u32 hw;
647 
648 	hw = width / 2;
649 	if (dst_yuv) {
650 		while (hw) {
651 			hw--;
652 
653 			dst[0] = dst[4] = *u_src;
654 			dst[1] = dst[5] = *v_src;
655 			dst[2] = *y_src;
656 			dst[3] = 0xFF;
657 			dst[6] = *(y_src+2);
658 			dst[7] = 0xFF;
659 
660 			dst += 8;
661 			y_src += 4;
662 			u_src += 4;
663 			v_src += 4;
664 		}
665 		return;
666 	}
667 	while (hw) {
668 		s32 b_u, g_uv, r_v, rgb_y;
669 		hw--;
670 
671 		b_u = B_U[*u_src];
672 		g_uv = G_U[*u_src] + G_V[*v_src];
673 		r_v = R_V[*v_src];
674 
675 		rgb_y = RGB_Y[*y_src];
676 		dst[0] = col_clip( (rgb_y + r_v) >> SCALEBITS_OUT);
677 		dst[1] = col_clip( (rgb_y - g_uv) >> SCALEBITS_OUT);
678 		dst[2] = col_clip( (rgb_y + b_u) >> SCALEBITS_OUT);
679 		dst[3] = 0xFF;
680 
681 		rgb_y = RGB_Y[*(y_src+2)];
682 		dst[4] = col_clip( (rgb_y + r_v) >> SCALEBITS_OUT);
683 		dst[5] = col_clip( (rgb_y - g_uv) >> SCALEBITS_OUT);
684 		dst[6] = col_clip( (rgb_y + b_u) >> SCALEBITS_OUT);
685 		dst[7] = 0xFF;
686 
687 		dst += 8;
688 		y_src += 4;
689 		u_src += 4;
690 		v_src += 4;
691 	}
692 }
693 
694 
yuva_load_lines(unsigned char * dst,s32 dststride,unsigned char * y_src,unsigned char * u_src,unsigned char * v_src,unsigned char * a_src,s32 y_stride,s32 uv_stride,s32 width,Bool dst_yuv)695 static void yuva_load_lines(unsigned char *dst, s32 dststride, unsigned char *y_src, unsigned char *u_src, unsigned char *v_src, unsigned char *a_src,
696                                s32 y_stride, s32 uv_stride, s32 width, Bool dst_yuv)
697 {
698 	u32 hw, x;
699 	unsigned char *dst2 = dst + dststride;
700 	unsigned char *y_src2 = y_src + y_stride;
701 	unsigned char *a_src2 = a_src + y_stride;
702 
703 	yuv2rgb_init();
704 
705 	hw = width / 2;
706 	if (dst_yuv) {
707 		for (x = 0; x < hw; x++) {
708 
709 			dst[0] = dst[4] = dst2[0] = dst2[4] = v_src[x];
710 			dst[1] = dst[5] = dst2[1] = dst2[5] = u_src[x];
711 
712 			dst[2] = *y_src;
713 			dst[3] = *a_src;
714 			y_src++;
715 			a_src++;
716 
717 			dst[6] = *y_src;
718 			dst[7] = *a_src;
719 			y_src++;
720 			a_src++;
721 
722 			dst2[2] = *y_src2;
723 			dst2[3] = *a_src2;
724 			y_src2++;
725 			a_src2++;
726 
727 			dst2[6] = *y_src2;
728 			dst2[7] = *a_src2;
729 			y_src2++;
730 			a_src2++;
731 
732 			dst += 8;
733 			dst2 += 8;
734 		}
735 		return;
736 	}
737 	for (x = 0; x < hw; x++) {
738 		s32 u, v;
739 		s32 b_u, g_uv, r_v, rgb_y;
740 
741 		u = u_src[x];
742 		v = v_src[x];
743 
744 		b_u = B_U[u];
745 		g_uv = G_U[u] + G_V[v];
746 		r_v = R_V[v];
747 
748 		rgb_y = RGB_Y[*y_src];
749 		dst[0] = col_clip ( (rgb_y + r_v) >> SCALEBITS_OUT );
750 		dst[1] = col_clip ( (rgb_y - g_uv) >> SCALEBITS_OUT );
751 		dst[2] = col_clip ( (rgb_y + b_u) >> SCALEBITS_OUT );
752 		dst[3] = *a_src;
753 		y_src++;
754 		a_src++;
755 
756 		rgb_y = RGB_Y[*y_src];
757 		dst[4] = col_clip ( (rgb_y + r_v) >> SCALEBITS_OUT );
758 		dst[5] = col_clip ( (rgb_y - g_uv) >> SCALEBITS_OUT );
759 		dst[6] = col_clip ( (rgb_y + b_u) >> SCALEBITS_OUT );
760 		dst[7] = *a_src;
761 		y_src++;
762 		a_src++;
763 
764 		rgb_y = RGB_Y[*y_src2];
765 		dst2[0] = col_clip ( (rgb_y + r_v) >> SCALEBITS_OUT );
766 		dst2[1] = col_clip ( (rgb_y - g_uv) >> SCALEBITS_OUT );
767 		dst2[2] = col_clip ( (rgb_y + b_u) >> SCALEBITS_OUT );
768 		dst2[3] = *a_src2;
769 		y_src2++;
770 		a_src2++;
771 
772 		rgb_y = RGB_Y[*y_src2];
773 		dst2[4] = col_clip ( (rgb_y + r_v) >> SCALEBITS_OUT );
774 		dst2[5] = col_clip ( (rgb_y - g_uv) >> SCALEBITS_OUT );
775 		dst2[6] = col_clip ( (rgb_y + b_u) >> SCALEBITS_OUT );
776 		dst2[7] = *a_src2;
777 		y_src2++;
778 		a_src2++;
779 
780 		dst += 8;
781 		dst2 += 8;
782 	}
783 }
784 
mul255(s32 a,s32 b)785 static s32 mul255(s32 a, s32 b)
786 {
787 	return ((a+1) * b) >> 8;
788 }
789 
790 typedef void (*copy_row_proto)(u8 *src, u32 src_w, u8 *_dst, u32 dst_w, s32 h_inc, s32 x_pitch, u8 alpha, u32 dst_pitch, u32 dst_height);
791 typedef void (*load_line_proto)(u8 *src_bits, u32 x_offset, u32 y_offset, u32 y_pitch, u32 src_width, u32 src_height, u8 *dst_bits, Bool dst_yuv);
792 
copy_row_rgb_555(u8 * src,u32 src_w,u8 * _dst,u32 dst_w,s32 h_inc,s32 x_pitch,u8 alpha,u32 dst_pitch,u32 dst_height)793 static void copy_row_rgb_555(u8 *src, u32 src_w, u8 *_dst, u32 dst_w, s32 h_inc, s32 x_pitch, u8 alpha, u32 dst_pitch, u32 dst_height)
794 {
795 	s32 pos;
796 	u16 *dst = (u16 *)_dst;
797 	u8 a=0, r=0, g=0, b=0;
798 	x_pitch /= 2;
799 	pos = 0x10000;
800 	while (dst_w) {
801 		while ( pos >= 0x10000L ) {
802 			r = *src++;
803 			g = *src++;
804 			b = *src++;
805 			a = *src++;
806 			pos -= 0x10000L;
807 		}
808 		if (a) *dst = GF_COL_555(r, g, b);
809 		dst += x_pitch;
810 		pos += h_inc;
811 		dst_w--;
812 	}
813 }
814 
copy_row_rgb_565(u8 * src,u32 src_w,u8 * _dst,u32 dst_w,s32 h_inc,s32 x_pitch,u8 alpha,u32 dst_pitch,u32 dst_height)815 static void copy_row_rgb_565(u8 *src, u32 src_w, u8 *_dst, u32 dst_w, s32 h_inc, s32 x_pitch, u8 alpha, u32 dst_pitch, u32 dst_height)
816 {
817 	s32 pos;
818 	u16 *dst = (u16 *)_dst;
819 	u8 a=0, r=0, g=0, b=0;
820 	x_pitch /= 2;
821 	pos = 0x10000;
822 	while (dst_w) {
823 		while ( pos >= 0x10000L ) {
824 			r = *src++;
825 			g = *src++;
826 			b = *src++;
827 			a = *src++;
828 			pos -= 0x10000L;
829 		}
830 		if (a) *dst = GF_COL_565(r, g, b);
831 		dst += x_pitch;
832 		pos += h_inc;
833 		dst_w--;
834 	}
835 }
836 
837 
copy_row_rgb_24(u8 * src,u32 src_w,u8 * dst,u32 dst_w,s32 h_inc,s32 x_pitch,u8 alpha,u32 dst_pitch,u32 dst_height)838 static void copy_row_rgb_24(u8 *src, u32 src_w, u8 *dst, u32 dst_w, s32 h_inc, s32 x_pitch, u8 alpha, u32 dst_pitch, u32 dst_height)
839 {
840 	s32 pos;
841 	u8 a=0, r=0, g=0, b=0;
842 
843 	pos = 0x10000;
844 	while (dst_w) {
845 		while ( pos >= 0x10000L ) {
846 			r = *src++;
847 			g = *src++;
848 			b = *src++;
849 			a = *src++;
850 			pos -= 0x10000L;
851 		}
852 		if (a) {
853 			dst[0] = r;
854 			dst[1] = g;
855 			dst[2] = b;
856 		}
857 		dst += x_pitch;
858 		pos += h_inc;
859 		dst_w--;
860 	}
861 }
862 
copy_row_bgr_24(u8 * src,u32 src_w,u8 * dst,u32 dst_w,s32 h_inc,s32 x_pitch,u8 alpha,u32 dst_pitch,u32 dst_height)863 static void copy_row_bgr_24(u8 *src, u32 src_w, u8 *dst, u32 dst_w, s32 h_inc, s32 x_pitch, u8 alpha, u32 dst_pitch, u32 dst_height)
864 {
865 	s32 pos;
866 	u8 a=0, r=0, g=0, b=0;
867 
868 	pos = 0x10000;
869 	while (dst_w) {
870 		while ( pos >= 0x10000L ) {
871 			r = *src++;
872 			g = *src++;
873 			b = *src++;
874 			a = *src++;
875 			pos -= 0x10000L;
876 		}
877 		if (a) {
878 			dst[0] = b;
879 			dst[1] = g;
880 			dst[2] = r;
881 		}
882 		dst += x_pitch;
883 		pos += h_inc;
884 		dst_w--;
885 	}
886 }
887 
copy_row_bgrx(u8 * src,u32 src_w,u8 * dst,u32 dst_w,s32 h_inc,s32 x_pitch,u8 alpha,u32 dst_pitch,u32 dst_height)888 static void copy_row_bgrx(u8 *src, u32 src_w, u8 *dst, u32 dst_w, s32 h_inc, s32 x_pitch, u8 alpha, u32 dst_pitch, u32 dst_height)
889 {
890 	u8 a=0, r=0, g=0, b=0;
891 	s32 pos = 0x10000L;
892 
893 	while (dst_w) {
894 		while ( pos >= 0x10000L ) {
895 			r = *src++;
896 			g = *src++;
897 			b = *src++;
898 			a = *src++;
899 			pos -= 0x10000L;
900 		}
901 		if (a) {
902 			dst[0] = b;
903 			dst[1] = g;
904 			dst[2] = r;
905 			dst[3] = 0xFF;
906 		}
907 		dst += x_pitch;
908 		pos += h_inc;
909 		dst_w--;
910 	}
911 }
912 
913 
copy_row_argb(u8 * src,u32 src_w,u8 * dst,u32 dst_w,s32 h_inc,s32 x_pitch,u8 alpha,u32 dst_pitch,u32 dst_height)914 static void copy_row_argb(u8 *src, u32 src_w, u8 *dst, u32 dst_w, s32 h_inc, s32 x_pitch, u8 alpha, u32 dst_pitch, u32 dst_height)
915 {
916 	u8 a=0, r=0, g=0, b=0;
917 	s32 pos = 0x10000L;
918 
919 	while (dst_w) {
920 		while ( pos >= 0x10000L ) {
921 			r = *src++;
922 			g = *src++;
923 			b = *src++;
924 			a = *src++;
925 			pos -= 0x10000L;
926 		}
927 		if (a) {
928 			dst[0] = 0xFF;
929 			dst[1] = r;
930 			dst[2] = g;
931 			dst[3] = b;
932 		}
933 		dst += x_pitch;
934 		pos += h_inc;
935 		dst_w--;
936 	}
937 }
938 
copy_row_rgbx(u8 * src,u32 src_w,u8 * dst,u32 dst_w,s32 h_inc,s32 x_pitch,u8 alpha,u32 dst_pitch,u32 dst_height)939 static void copy_row_rgbx(u8 *src, u32 src_w, u8 *dst, u32 dst_w, s32 h_inc, s32 x_pitch, u8 alpha, u32 dst_pitch, u32 dst_height)
940 {
941 	u8 a=0, r=0, g=0, b=0;
942 	s32 pos = 0x10000L;
943 
944 	while ( dst_w) {
945 		while ( pos >= 0x10000L ) {
946 			r = *src++;
947 			g = *src++;
948 			b = *src++;
949 			a = *src++;
950 			pos -= 0x10000L;
951 		}
952 		if (a) {
953 			dst[0] = r;
954 			dst[1] = g;
955 			dst[2] = b;
956 			dst[3] = 0xFF;
957 		}
958 		dst+=x_pitch;
959 		pos += h_inc;
960 		dst_w--;
961 	}
962 }
963 
copy_row_rgbd(u8 * src,u32 src_w,u8 * dst,u32 dst_w,s32 h_inc,s32 x_pitch,u8 alpha,u32 dst_pitch,u32 dst_height)964 static void copy_row_rgbd(u8 *src, u32 src_w, u8 *dst, u32 dst_w, s32 h_inc, s32 x_pitch, u8 alpha, u32 dst_pitch, u32 dst_height)
965 {
966 	u8 a=0, r=0, g=0, b=0;
967 	s32 pos = 0x10000L;
968 
969 	while ( dst_w) {
970 		while ( pos >= 0x10000L ) {
971 			r = *src++;
972 			g = *src++;
973 			b = *src++;
974 			a = *src++;
975 			pos -= 0x10000L;
976 		}
977 		dst[0] = r;
978 		dst[1] = g;
979 		dst[2] = b;
980 		dst[3] = a;
981 
982 		dst+=x_pitch;
983 		pos += h_inc;
984 		dst_w--;
985 	}
986 }
987 #if 0
988 static void copy_row_yuv444(u8 *src, u32 src_w, u8 *dst, u32 dst_w, s32 h_inc, s32 x_pitch, u8 alpha, u32 dst_pitch, u32 dst_height)
989 {
990 	s32 pos;
991 	u8 *dY, *dU, *dV;
992 	u8 a=0, y=0, u=0, v=0;
993 
994 	dY = dst;
995 	dU = dst + dst_height * dst_pitch;
996 	dV = dU + dst_height * dst_pitch;
997 
998 	pos = 0x10000;
999 	while (dst_w) {
1000 		while ( pos >= 0x10000L ) {
1001 			v = *src++;
1002 			u = *src++;
1003 			y = *src++;
1004 			a = *src++;
1005 			pos -= 0x10000L;
1006 		}
1007 		if (a) {
1008 			*dV = v;
1009 			*dU = u;
1010 			*dY = y;
1011 		}
1012 		dY++;
1013 		dU++;
1014 		dV++;
1015 
1016 		pos += h_inc;
1017 		dst_w--;
1018 	}
1019 }
1020 #endif
1021 
merge_row_rgb_555(u8 * src,u32 src_w,u8 * _dst,u32 dst_w,s32 h_inc,s32 x_pitch,u8 alpha,u32 dst_pitch,u32 dst_height)1022 static void merge_row_rgb_555(u8 *src, u32 src_w, u8 *_dst, u32 dst_w, s32 h_inc, s32 x_pitch, u8 alpha, u32 dst_pitch, u32 dst_height)
1023 {
1024 	u32 _r, _g, _b, a=0, r=0, g=0, b=0;
1025 	s32 pos;
1026 	u16 col, *dst = (u16 *)_dst;
1027 	x_pitch /= 2;
1028 	pos = 0x10000;
1029 	while (dst_w) {
1030 		while ( pos >= 0x10000L ) {
1031 			r = *src++;
1032 			g = *src++;
1033 			b = *src++;
1034 			a = *src++;
1035 			pos -= 0x10000L;
1036 			a = mul255(a, alpha);
1037 		}
1038 		if (a && alpha) {
1039 			col = *dst;
1040 			_r = colmask(col >> (10 - 3), 3);
1041 			_g = colmask(col >> (5 - 3), 3);
1042 			_b = colmask(col << 3, 3);
1043 
1044 			_r = mul255(a, r - _r) + _r;
1045 			_g = mul255(a, g - _g) + _g;
1046 			_b = mul255(a, b - _b) + _b;
1047 			*dst = GF_COL_555(_r, _g, _b);
1048 		}
1049 		dst += x_pitch;
1050 		pos += h_inc;
1051 		dst_w--;
1052 	}
1053 }
1054 
merge_row_rgb_565(u8 * src,u32 src_w,u8 * _dst,u32 dst_w,s32 h_inc,s32 x_pitch,u8 alpha,u32 dst_pitch,u32 dst_height)1055 static void merge_row_rgb_565(u8 *src, u32 src_w, u8 *_dst, u32 dst_w, s32 h_inc, s32 x_pitch, u8 alpha, u32 dst_pitch, u32 dst_height)
1056 {
1057 	u32 _r, _g, _b, a=0, r=0, g=0, b=0;
1058 	s32 pos;
1059 	u16 col, *dst = (u16 *)_dst;
1060 	x_pitch /= 2;
1061 	pos = 0x10000;
1062 	while (dst_w) {
1063 		while ( pos >= 0x10000L ) {
1064 			r = *src++;
1065 			g = *src++;
1066 			b = *src++;
1067 			a = *src++;
1068 			pos -= 0x10000L;
1069 			a = mul255(a, alpha);
1070 		}
1071 		if (a) {
1072 			col = *dst;
1073 			_r = (col >> 8) & 0xf8;
1074 			_g = (col >> 3) & 0xfc;
1075 			_b = (col << 3) & 0xf8;
1076 			_r = mul255(a, r - _r) + _r;
1077 			_g = mul255(a, g - _g) + _g;
1078 			_b = mul255(a, b - _b) + _b;
1079 			*dst = GF_COL_565(_r, _g, _b);
1080 		}
1081 		dst += x_pitch;
1082 		pos += h_inc;
1083 		dst_w--;
1084 	}
1085 }
1086 
1087 
merge_row_rgb_24(u8 * src,u32 src_w,u8 * dst,u32 dst_w,s32 h_inc,s32 x_pitch,u8 alpha,u32 dst_pitch,u32 dst_height)1088 static void merge_row_rgb_24(u8 *src, u32 src_w, u8 *dst, u32 dst_w, s32 h_inc, s32 x_pitch, u8 alpha, u32 dst_pitch, u32 dst_height)
1089 {
1090 	u32 _r, _g, _b, a=0, r=0, g=0, b=0;
1091 	s32 pos;
1092 
1093 	pos = 0x10000;
1094 	while (dst_w) {
1095 		while ( pos >= 0x10000L ) {
1096 			r = *src++;
1097 			g = *src++;
1098 			b = *src++;
1099 			a = *src++;
1100 			pos -= 0x10000L;
1101 			a = mul255(a, alpha);
1102 		}
1103 		if (a) {
1104 			_r = dst[0];
1105 			_g = dst[0];
1106 			_b = dst[0];
1107 			dst[0] = mul255(a, r - _r) + _r;
1108 			dst[1] = mul255(a, g - _g) + _g;
1109 			dst[2] = mul255(a, b - _b) + _b;
1110 		}
1111 		dst += x_pitch;
1112 		pos += h_inc;
1113 		dst_w--;
1114 	}
1115 }
1116 
1117 
1118 #if 0
1119 static void merge_row_yuv444(u8 *src, u32 src_w, u8 *dst, u32 dst_w, s32 h_inc, s32 x_pitch, u8 alpha, u32 dst_pitch, u32 dst_height)
1120 {
1121 	u32 _r, _g, _b, a=0, r=0, g=0, b=0;
1122 	s32 pos;
1123 	u8 *dY, *dU, *dV;
1124 
1125 	dY = dst;
1126 	dU = dst + dst_height * dst_pitch;
1127 	dV = dU + dst_height * dst_pitch;
1128 
1129 	pos = 0x10000;
1130 	while (dst_w) {
1131 		while ( pos >= 0x10000L ) {
1132 			r = *src++;
1133 			g = *src++;
1134 			b = *src++;
1135 			a = *src++;
1136 			pos -= 0x10000L;
1137 			a = mul255(a, alpha);
1138 		}
1139 		if (a) {
1140 			_r = dY[0];
1141 			_g = dU[0];
1142 			_b = dV[0];
1143 			dY[0] = mul255(a, r - _r) + _r;
1144 			dU[1] = mul255(a, g - _g) + _g;
1145 			dV[2] = mul255(a, b - _b) + _b;
1146 		}
1147 		dY++;
1148 		dU++;
1149 		dV++;
1150 
1151 		pos += h_inc;
1152 		dst_w--;
1153 	}
1154 }
1155 #endif
1156 
merge_row_bgr_24(u8 * src,u32 src_w,u8 * dst,u32 dst_w,s32 h_inc,s32 x_pitch,u8 alpha,u32 dst_pitch,u32 dst_height)1157 static void merge_row_bgr_24(u8 *src, u32 src_w, u8 *dst, u32 dst_w, s32 h_inc, s32 x_pitch, u8 alpha, u32 dst_pitch, u32 dst_height)
1158 {
1159 	u32 _r, _g, _b, a=0, r=0, g=0, b=0;
1160 	s32 pos;
1161 
1162 	pos = 0x10000;
1163 	while (dst_w) {
1164 		while ( pos >= 0x10000L ) {
1165 			r = *src++;
1166 			g = *src++;
1167 			b = *src++;
1168 			a = *src++;
1169 			pos -= 0x10000L;
1170 		}
1171 
1172 		if (a && alpha) {
1173 			_b = dst[0];
1174 			_g = dst[1];
1175 			_r = dst[2];
1176 			a = mul255(a, alpha);
1177 			dst[0] = mul255(a, b - _b) + _b;
1178 			dst[1] = mul255(a, g - _g) + _g;
1179 			dst[2] = mul255(a, r - _r) + _r;
1180 		}
1181 		dst += x_pitch;
1182 		pos += h_inc;
1183 		dst_w--;
1184 	}
1185 }
1186 
1187 
merge_row_bgrx(u8 * src,u32 src_w,u8 * dst,u32 dst_w,s32 h_inc,s32 x_pitch,u8 alpha,u32 dst_pitch,u32 dst_height)1188 static void merge_row_bgrx(u8 *src, u32 src_w, u8 *dst, u32 dst_w, s32 h_inc, s32 x_pitch, u8 alpha, u32 dst_pitch, u32 dst_height)
1189 {
1190 	u32 _r, _g, _b, a=0, r=0, g=0, b=0;
1191 	s32 pos;
1192 
1193 	pos = 0x10000;
1194 	while (dst_w) {
1195 		while ( pos >= 0x10000L ) {
1196 			r = *src++;
1197 			g = *src++;
1198 			b = *src++;
1199 			a = *src++;
1200 			a = mul255(a, alpha);
1201 			pos -= 0x10000L;
1202 		}
1203 
1204 		if (a) {
1205 			_b = dst[0];
1206 			_g = dst[1];
1207 			_r = dst[2];
1208 
1209 			_r = mul255(a, r - _r) + _r;
1210 			_g = mul255(a, g - _g) + _g;
1211 			_b = mul255(a, b - _b) + _b;
1212 
1213 			dst[0] = _b;
1214 			dst[1] = _g;
1215 			dst[2] = _r;
1216 			dst[3] = 0xFF;
1217 		}
1218 		dst += x_pitch;
1219 		pos += h_inc;
1220 		dst_w--;
1221 	}
1222 }
1223 
merge_row_rgbx(u8 * src,u32 src_w,u8 * dst,u32 dst_w,s32 h_inc,s32 x_pitch,u8 alpha,u32 dst_pitch,u32 dst_height)1224 static void merge_row_rgbx(u8 *src, u32 src_w, u8 *dst, u32 dst_w, s32 h_inc, s32 x_pitch, u8 alpha, u32 dst_pitch, u32 dst_height)
1225 {
1226 	u32 _r, _g, _b, a=0, r=0, g=0, b=0;
1227 	s32 pos;
1228 
1229 	pos = 0x10000;
1230 	while (dst_w) {
1231 		while ( pos >= 0x10000L ) {
1232 			r = *src++;
1233 			g = *src++;
1234 			b = *src++;
1235 			a = *src++;
1236 			a = mul255(a, alpha);
1237 			pos -= 0x10000L;
1238 		}
1239 
1240 		if (a) {
1241 			_r = dst[0];
1242 			_g = dst[1];
1243 			_b = dst[2];
1244 			_r = mul255(a, r - _r) + _r;
1245 			_g = mul255(a, g - _g) + _g;
1246 			_b = mul255(a, b - _b) + _b;
1247 			dst[0] = _r;
1248 			dst[1] = _g;
1249 			dst[2] = _b;
1250 			dst[3] = 0xFF;
1251 		}
1252 		dst += x_pitch;
1253 		pos += h_inc;
1254 		dst_w--;
1255 	}
1256 }
1257 
1258 
merge_row_bgra(u8 * src,u32 src_w,u8 * dst,u32 dst_w,s32 h_inc,s32 x_pitch,u8 alpha,u32 dst_pitch,u32 dst_height)1259 static void merge_row_bgra(u8 *src, u32 src_w, u8 *dst, u32 dst_w, s32 h_inc, s32 x_pitch, u8 alpha, u32 dst_pitch, u32 dst_height)
1260 {
1261 	u32 _a, _r, _g, _b, a=0, r=0, g=0, b=0;
1262 	s32 pos;
1263 
1264 	pos = 0x10000;
1265 	while (dst_w) {
1266 		while ( pos >= 0x10000L ) {
1267 			r = *src++;
1268 			g = *src++;
1269 			b = *src++;
1270 			a = *src++;
1271 			pos -= 0x10000L;
1272 			a = mul255(a, alpha);
1273 		}
1274 
1275 		if (a) {
1276 			_b = dst[0];
1277 			_g = dst[1];
1278 			_r = dst[2];
1279 			if (dst[3]) {
1280 				_a = mul255(a, a) + mul255(0xFF-a, 0xFF);
1281 				_r = mul255(a, r - _r) + _r;
1282 				_g = mul255(a, g - _g) + _g;
1283 				_b = mul255(a, b - _b) + _b;
1284 				dst[0] = _b;
1285 				dst[1] = _g;
1286 				dst[2] = _r;
1287 				dst[3] = _a;
1288 			} else {
1289 				dst[0] = b;
1290 				dst[1] = g;
1291 				dst[2] = r;
1292 				dst[3] = a;
1293 			}
1294 		}
1295 		dst += x_pitch;
1296 		pos += h_inc;
1297 		dst_w--;
1298 	}
1299 }
1300 
1301 
merge_row_argb(u8 * src,u32 src_w,u8 * dst,u32 dst_w,s32 h_inc,s32 x_pitch,u8 alpha,u32 dst_pitch,u32 dst_height)1302 static void merge_row_argb(u8 *src, u32 src_w, u8 *dst, u32 dst_w, s32 h_inc, s32 x_pitch, u8 alpha, u32 dst_pitch, u32 dst_height)
1303 {
1304 	u32 _a, _r, _g, _b, a=0, r=0, g=0, b=0;
1305 	s32 pos;
1306 
1307 	pos = 0x10000;
1308 	while (dst_w) {
1309 		while ( pos >= 0x10000L ) {
1310 			r = *src++;
1311 			g = *src++;
1312 			b = *src++;
1313 			a = *src++;
1314 			pos -= 0x10000L;
1315 			a = mul255(a, alpha);
1316 		}
1317 
1318 		if (a) {
1319 			_r = dst[1];
1320 			_g = dst[2];
1321 			_b = dst[3];
1322 			if (dst[0]) {
1323 				_a = mul255(a, a) + mul255(0xFF-a, 0xFF);
1324 				_r = mul255(a, r - _r) + _r;
1325 				_g = mul255(a, g - _g) + _g;
1326 				_b = mul255(a, b - _b) + _b;
1327 				dst[0] = _a;
1328 				dst[1] = _r;
1329 				dst[2] = _g;
1330 				dst[3] = _b;
1331 			} else {
1332 				dst[0] = a;
1333 				dst[1] = b;
1334 				dst[2] = g;
1335 				dst[3] = r;
1336 			}
1337 		}
1338 		dst += x_pitch;
1339 		pos += h_inc;
1340 		dst_w--;
1341 	}
1342 }
1343 
merge_row_rgba(u8 * src,u32 src_w,u8 * dst,u32 dst_w,s32 h_inc,s32 x_pitch,u8 alpha,u32 dst_pitch,u32 dst_height)1344 static void merge_row_rgba(u8 *src, u32 src_w, u8 *dst, u32 dst_w, s32 h_inc, s32 x_pitch, u8 alpha, u32 dst_pitch, u32 dst_height)
1345 {
1346 	u32 _a, _r, _g, _b, a=0, r=0, g=0, b=0;
1347 	s32 pos;
1348 	pos = 0x10000;
1349 	while (dst_w) {
1350 		while ( pos >= 0x10000L ) {
1351 			r = *src++;
1352 			g = *src++;
1353 			b = *src++;
1354 			a = *src++;
1355 			pos -= 0x10000L;
1356 			a = mul255(a, alpha);
1357 		}
1358 
1359 		if (a) {
1360 			_r = dst[0];
1361 			_g = dst[1];
1362 			_b = dst[2];
1363 			if (dst[3]) {
1364 				_a = mul255(a, a) + mul255(0xFF-a, 0xFF);
1365 				_r = mul255(a, r - _r) + _r;
1366 				_g = mul255(a, g - _g) + _g;
1367 				_b = mul255(a, b - _b) + _b;
1368 				dst[0] = _r;
1369 				dst[1] = _g;
1370 				dst[2] = _b;
1371 				dst[3] = _a;
1372 			} else {
1373 				dst[0] = r;
1374 				dst[1] = g;
1375 				dst[2] = b;
1376 				dst[3] = a;
1377 			}
1378 		}
1379 		dst += x_pitch;
1380 		pos += h_inc;
1381 		dst_w--;
1382 	}
1383 }
1384 
1385 
load_line_grey(u8 * src_bits,u32 x_offset,u32 y_offset,u32 y_pitch,u32 width,u32 height,u8 * dst_bits,Bool dst_yuv)1386 static void load_line_grey(u8 *src_bits, u32 x_offset, u32 y_offset, u32 y_pitch, u32 width, u32 height, u8 *dst_bits, Bool dst_yuv)
1387 {
1388 	u32 i;
1389 	src_bits += x_offset + y_offset*y_pitch;
1390 	for (i=0; i<width; i++) {
1391 		dst_bits[0] = dst_bits[1] = dst_bits[2] = *src_bits++;
1392 		dst_bits[3] = 0xFF;
1393 		dst_bits+=4;
1394 	}
1395 }
1396 
load_line_alpha_grey(u8 * src_bits,u32 x_offset,u32 y_offset,u32 y_pitch,u32 width,u32 height,u8 * dst_bits,Bool dst_yuv)1397 static void load_line_alpha_grey(u8 *src_bits, u32 x_offset, u32 y_offset, u32 y_pitch, u32 width, u32 height, u8 *dst_bits, Bool dst_yuv)
1398 {
1399 	u32 i;
1400 	src_bits += x_offset*2 + y_offset*y_pitch;
1401 	for (i=0; i<width; i++) {
1402 		dst_bits[0] = dst_bits[1] = dst_bits[2] = *src_bits++;
1403 		dst_bits[3] = *src_bits++;
1404 		dst_bits+=4;
1405 	}
1406 }
1407 
load_line_grey_alpha(u8 * src_bits,u32 x_offset,u32 y_offset,u32 y_pitch,u32 width,u32 height,u8 * dst_bits,Bool dst_yuv)1408 static void load_line_grey_alpha(u8 *src_bits, u32 x_offset, u32 y_offset, u32 y_pitch, u32 width, u32 height, u8 *dst_bits, Bool dst_yuv)
1409 {
1410 	u32 i;
1411 	src_bits += x_offset*2 + y_offset*y_pitch;
1412 	for (i=0; i<width; i++) {
1413 		dst_bits[3] = *src_bits++;
1414 		dst_bits[0] = dst_bits[1] = dst_bits[2] = *src_bits++;
1415 		dst_bits+=4;
1416 	}
1417 }
1418 
load_line_rgb_555(u8 * src_bits,u32 x_offset,u32 y_offset,u32 y_pitch,u32 width,u32 height,u8 * dst_bits,Bool dst_yuv)1419 static void load_line_rgb_555(u8 *src_bits, u32 x_offset, u32 y_offset, u32 y_pitch, u32 width, u32 height, u8 *dst_bits, Bool dst_yuv)
1420 {
1421 	u32 i;
1422 	src_bits += x_offset*3 + y_offset*y_pitch;
1423 	for (i=0; i<width; i++) {
1424 		u16 c = *((u16*)src_bits + i);
1425 		dst_bits[0] = colmask(c >> (10 - 3), 3);
1426 		dst_bits[1] = colmask(c >> (5 - 3), 3);
1427 		dst_bits[2] = colmask(c << 3, 3);
1428 		dst_bits[3] = 0xFF;
1429 		dst_bits+=4;
1430 	}
1431 }
1432 
load_line_rgb_565(u8 * src_bits,u32 x_offset,u32 y_offset,u32 y_pitch,u32 width,u32 height,u8 * dst_bits,Bool dst_yuv)1433 static void load_line_rgb_565(u8 *src_bits, u32 x_offset, u32 y_offset, u32 y_pitch, u32 width, u32 height, u8 *dst_bits, Bool dst_yuv)
1434 {
1435 	u32 i;
1436 	src_bits += x_offset*3 + y_offset*y_pitch;
1437 	for (i=0; i<width; i++) {
1438 		u16 c = *((u16*)src_bits + i);
1439 		dst_bits[0] = colmask(c >> (11 - 3), 3);
1440 		dst_bits[1] = colmask(c >> (5 - 2), 2);
1441 		dst_bits[2] = colmask(c << 3, 3);
1442 		dst_bits[3] = 0xFF;
1443 		dst_bits+=4;
1444 	}
1445 }
1446 
load_line_rgb_24(u8 * src_bits,u32 x_offset,u32 y_offset,u32 y_pitch,u32 width,u32 height,u8 * dst_bits,Bool dst_yuv)1447 static void load_line_rgb_24(u8 *src_bits, u32 x_offset, u32 y_offset, u32 y_pitch, u32 width, u32 height, u8 *dst_bits, Bool dst_yuv)
1448 {
1449 	u32 i;
1450 	src_bits += x_offset*3 + y_offset*y_pitch;
1451 	for (i=0; i<width; i++) {
1452 		dst_bits[0] = *src_bits++;
1453 		dst_bits[1] = *src_bits++;
1454 		dst_bits[2] = *src_bits++;
1455 		dst_bits[3] = 0xFF;
1456 		dst_bits+=4;
1457 	}
1458 }
1459 
load_line_bgr_24(u8 * src_bits,u32 x_offset,u32 y_offset,u32 y_pitch,u32 width,u32 height,u8 * dst_bits,Bool dst_yuv)1460 static void load_line_bgr_24(u8 *src_bits, u32 x_offset, u32 y_offset, u32 y_pitch, u32 width, u32 height, u8 *dst_bits, Bool dst_yuv)
1461 {
1462 	u32 i;
1463 	src_bits += x_offset*3 + y_offset*y_pitch;
1464 	for (i=0; i<width; i++) {
1465 		dst_bits[2] = *src_bits++;
1466 		dst_bits[1] = *src_bits++;
1467 		dst_bits[0] = *src_bits++;
1468 		dst_bits[3] = 0xFF;
1469 		dst_bits+=4;
1470 	}
1471 }
1472 
load_line_rgb_32(u8 * src_bits,u32 x_offset,u32 y_offset,u32 y_pitch,u32 width,u32 height,u8 * dst_bits,Bool dst_yuv)1473 static void load_line_rgb_32(u8 *src_bits, u32 x_offset, u32 y_offset, u32 y_pitch, u32 width, u32 height, u8 *dst_bits, Bool dst_yuv)
1474 {
1475 	u32 i;
1476 	src_bits += x_offset*4 + y_offset*y_pitch;
1477 	for (i=0; i<width; i++) {
1478 		dst_bits[0] = *src_bits++;
1479 		dst_bits[1] = *src_bits++;
1480 		dst_bits[2] = *src_bits++;
1481 		dst_bits[3] = *src_bits++;
1482 		dst_bits += 4;
1483 	}
1484 }
load_line_xrgb(u8 * src_bits,u32 x_offset,u32 y_offset,u32 y_pitch,u32 width,u32 height,u8 * dst_bits,Bool dst_yuv)1485 static void load_line_xrgb(u8 *src_bits, u32 x_offset, u32 y_offset, u32 y_pitch, u32 width, u32 height, u8 *dst_bits, Bool dst_yuv)
1486 {
1487 	u32 i;
1488 	src_bits += x_offset*4 + y_offset*y_pitch;
1489 	for (i=0; i<width; i++) {
1490 		src_bits++;
1491 		dst_bits[0] = *src_bits++;
1492 		dst_bits[1] = *src_bits++;
1493 		dst_bits[2] = *src_bits++;
1494 		dst_bits[3] = 0xFF;
1495 		dst_bits += 4;
1496 	}
1497 }
load_line_bgrx(u8 * src_bits,u32 x_offset,u32 y_offset,u32 y_pitch,u32 width,u32 height,u8 * dst_bits,Bool dst_yuv)1498 static void load_line_bgrx(u8 *src_bits, u32 x_offset, u32 y_offset, u32 y_pitch, u32 width, u32 height, u8 *dst_bits, Bool dst_yuv)
1499 {
1500 	u32 i;
1501 	src_bits += x_offset*4 + y_offset*y_pitch;
1502 	for (i=0; i<width; i++) {
1503 		dst_bits[2] = *src_bits++;
1504 		dst_bits[1] = *src_bits++;
1505 		dst_bits[0] = *src_bits++;
1506 		dst_bits[3] = 0xFF;
1507 		src_bits++;
1508 		dst_bits += 4;
1509 	}
1510 }
1511 
load_line_rgbd(u8 * src_bits,u32 x_offset,u32 y_offset,u32 y_pitch,u32 width,u32 height,u8 * dst_bits,Bool dst_yuv)1512 static void load_line_rgbd(u8 *src_bits, u32 x_offset, u32 y_offset, u32 y_pitch, u32 width, u32 height, u8 *dst_bits, Bool dst_yuv)
1513 {
1514 	u32 i;
1515 	src_bits += x_offset*4 + y_offset*y_pitch;
1516 	for (i=0; i<width; i++) {
1517 		dst_bits[0] = *src_bits++;
1518 		dst_bits[1] = *src_bits++;
1519 		dst_bits[2] = *src_bits++;
1520 		dst_bits[3] = 0xFF;
1521 		src_bits++;
1522 		dst_bits += 4;
1523 	}
1524 }
1525 
load_line_rgbds(u8 * src_bits,u32 x_offset,u32 y_offset,u32 y_pitch,u32 width,u32 height,u8 * dst_bits,Bool dst_yuv)1526 static void load_line_rgbds(u8 *src_bits, u32 x_offset, u32 y_offset, u32 y_pitch, u32 width, u32 height, u8 *dst_bits, Bool dst_yuv)
1527 {
1528 	u32 i;
1529 	src_bits += x_offset*4 + y_offset*y_pitch;
1530 	for (i=0; i<width; i++) {
1531 		dst_bits[0] = *src_bits++;
1532 		dst_bits[1] = *src_bits++;
1533 		dst_bits[2] = *src_bits++;
1534 		dst_bits[3] = (( *src_bits++) & 0x80) ? 255 : 0;
1535 		dst_bits += 4;
1536 	}
1537 }
1538 
load_line_bgra(u8 * src_bits,u32 x_offset,u32 y_offset,u32 y_pitch,u32 width,u32 height,u8 * dst_bits,Bool dst_yuv)1539 static void load_line_bgra(u8 *src_bits, u32 x_offset, u32 y_offset, u32 y_pitch, u32 width, u32 height, u8 *dst_bits, Bool dst_yuv)
1540 {
1541 	u32 i;
1542 	src_bits += x_offset*4 + y_offset*y_pitch;
1543 	for (i=0; i<width; i++) {
1544 		dst_bits[2] = *src_bits++;
1545 		dst_bits[1] = *src_bits++;
1546 		dst_bits[0] = *src_bits++;
1547 		dst_bits[3] = *src_bits++;
1548 		dst_bits += 4;
1549 	}
1550 }
1551 
load_line_argb(u8 * src_bits,u32 x_offset,u32 y_offset,u32 y_pitch,u32 width,u32 height,u8 * dst_bits,Bool dst_yuv)1552 static void load_line_argb(u8 *src_bits, u32 x_offset, u32 y_offset, u32 y_pitch, u32 width, u32 height, u8 *dst_bits, Bool dst_yuv)
1553 {
1554 	u32 i;
1555 	src_bits += x_offset*4 + y_offset*y_pitch;
1556 	for (i=0; i<width; i++) {
1557 		dst_bits[3] = *src_bits++;
1558 		dst_bits[0] = *src_bits++;
1559 		dst_bits[1] = *src_bits++;
1560 		dst_bits[2] = *src_bits++;
1561 		dst_bits += 4;
1562 	}
1563 }
load_line_yv12(char * src_bits,u32 x_offset,u32 y_offset,u32 y_pitch,u32 width,u32 height,u8 * dst_bits,u8 * pU,u8 * pV,Bool dst_yuv)1564 static void load_line_yv12(char *src_bits, u32 x_offset, u32 y_offset, u32 y_pitch, u32 width, u32 height, u8 *dst_bits, u8 *pU, u8 *pV, Bool dst_yuv)
1565 {
1566 	u8 *pY;
1567 	pY = (u8 *)src_bits;
1568 	if (!pU) {
1569 		pU = (u8 *)src_bits + y_pitch*height;
1570 		pV = (u8 *)src_bits + 5*y_pitch*height/4;
1571 	}
1572 
1573 	pY += x_offset + y_offset*y_pitch;
1574 	pU += x_offset/2 + y_offset*y_pitch/4;
1575 	pV += x_offset/2 + y_offset*y_pitch/4;
1576 	yuv_load_lines_planar((unsigned char*)dst_bits, 4*width, pY, pU, pV, y_pitch, y_pitch/2, width, dst_yuv);
1577 }
load_line_yuv422(char * src_bits,u32 x_offset,u32 y_offset,u32 y_pitch,u32 width,u32 height,u8 * dst_bits,u8 * pU,u8 * pV,Bool dst_yuv)1578 static void load_line_yuv422(char *src_bits, u32 x_offset, u32 y_offset, u32 y_pitch, u32 width, u32 height, u8 *dst_bits, u8 *pU, u8 *pV, Bool dst_yuv)
1579 {
1580 	u8 *pY;
1581 	pY = (u8 *)src_bits;
1582 	if (!pU) {
1583 		pU = (u8 *)src_bits + y_pitch*height;
1584 		pV = (u8 *)src_bits + 3 * y_pitch*height / 2;
1585 	}
1586 
1587 	pY += x_offset + y_offset*y_pitch;
1588 	pU += x_offset / 2 + y_offset*y_pitch / 2;
1589 	pV += x_offset / 2 + y_offset*y_pitch / 2;
1590 	yuv422_load_lines_planar((unsigned char*)dst_bits, 4 * width, pY, pU, pV, y_pitch, y_pitch / 2, width, dst_yuv);
1591 }
load_line_yuv444(char * src_bits,u32 x_offset,u32 y_offset,u32 y_pitch,u32 width,u32 height,u8 * dst_bits,u8 * pU,u8 * pV,Bool dst_yuv)1592 static void load_line_yuv444(char *src_bits, u32 x_offset, u32 y_offset, u32 y_pitch, u32 width, u32 height, u8 *dst_bits, u8 *pU, u8 *pV, Bool dst_yuv)
1593 {
1594 	u8 *pY;
1595 	pY = (u8 *)src_bits;
1596 	if (!pU) {
1597 		pU = (u8 *)src_bits + y_pitch*height;
1598 		pV = (u8 *)src_bits + 2 * y_pitch*height;
1599 	}
1600 
1601 
1602 	pY += x_offset + y_offset*y_pitch;
1603 	pU += x_offset + y_offset*y_pitch;
1604 	pV += x_offset + y_offset*y_pitch;
1605 	yuv444_load_lines_planar((unsigned char*)dst_bits, 4 * width, pY, pU, pV, y_pitch, y_pitch, width, dst_yuv);
1606 }
load_line_yv12_10(char * src_bits,u32 x_offset,u32 y_offset,u32 y_pitch,u32 width,u32 height,u8 * dst_bits,u8 * pU,u8 * pV,Bool dst_yuv)1607 static void load_line_yv12_10(char *src_bits, u32 x_offset, u32 y_offset, u32 y_pitch, u32 width, u32 height, u8 *dst_bits, u8 *pU, u8 *pV, Bool dst_yuv)
1608 {
1609 	u8 *pY;
1610 	pY = (u8 *)src_bits;
1611 	if (!pU) {
1612 		pU = (u8 *)src_bits + y_pitch*height;
1613 		pV = (u8 *)src_bits + 5*y_pitch*height/4;
1614 	}
1615 
1616 	pY += x_offset + y_offset*y_pitch;
1617 	pU += x_offset/2 + y_offset*y_pitch/4;
1618 	pV += x_offset/2 + y_offset*y_pitch/4;
1619 	yuv_10_load_lines_planar((unsigned char*)dst_bits, 4*width, pY, pU, pV, y_pitch, y_pitch/2, width, dst_yuv);
1620 }
load_line_yuv422_10(char * src_bits,u32 x_offset,u32 y_offset,u32 y_pitch,u32 width,u32 height,u8 * dst_bits,u8 * pU,u8 * pV,Bool dst_yuv)1621 static void load_line_yuv422_10(char *src_bits, u32 x_offset, u32 y_offset, u32 y_pitch, u32 width, u32 height, u8 *dst_bits, u8 *pU, u8 *pV, Bool dst_yuv)
1622 {
1623 	u8 *pY;
1624 	u16  *src_y, *src_u, *src_v;
1625 	pY = (u8 *)src_bits;
1626 	if (!pU) {
1627 		pU = (u8 *)src_bits + y_pitch*height;
1628 		pV = (u8 *)src_bits + 3 * y_pitch*height / 2;
1629 	}
1630 	src_y = (u16 *)pY + x_offset;
1631 	src_u = (u16 *)pU + x_offset / 2;
1632 	src_v = (u16 *)pV + x_offset / 2;
1633 
1634 
1635 	pY = (u8 *)src_y + y_offset*y_pitch;
1636 	pU = (u8 *)src_u + y_offset*y_pitch / 2;
1637 	pV = (u8 *)src_v + y_offset*y_pitch / 2;
1638 	yuv422_10_load_lines_planar((unsigned char*)dst_bits, 4 * width, pY, pU, pV, y_pitch, y_pitch / 2, width, dst_yuv);
1639 }
load_line_yuv444_10(char * src_bits,u32 x_offset,u32 y_offset,u32 y_pitch,u32 width,u32 height,u8 * dst_bits,u8 * pU,u8 * pV,Bool dst_yuv)1640 static void load_line_yuv444_10(char *src_bits, u32 x_offset, u32 y_offset, u32 y_pitch, u32 width, u32 height, u8 *dst_bits, u8 *pU, u8 *pV, Bool dst_yuv)
1641 {
1642 	u8 *pY;
1643 	u16  *src_y, *src_u, *src_v;
1644 	pY = (u8 *)src_bits;
1645 	if (!pU) {
1646 		pU = (u8 *)src_bits + y_pitch*height;
1647 		pV = (u8 *)src_bits + 2 * y_pitch*height;
1648 	}
1649 	 src_y = (u16 *)pY + x_offset;
1650 	 src_u = (u16 *)pU + x_offset;
1651 	 src_v = (u16 *)pV + x_offset;
1652 
1653 
1654 	pY = (u8 *)src_y + y_offset*y_pitch;
1655 	pU = (u8 *)src_u + y_offset*y_pitch;
1656 	pV = (u8 *)src_v + y_offset*y_pitch;
1657 	yuv444_10_load_lines_planar((unsigned char*)dst_bits, 4 * width, pY, pU, pV, y_pitch, y_pitch, width, dst_yuv);
1658 }
load_line_yuva(char * src_bits,u32 x_offset,u32 y_offset,u32 y_pitch,u32 width,u32 height,u8 * dst_bits,u8 * pU,u8 * pV,u8 * pA,Bool dst_yuv)1659 static void load_line_yuva(char *src_bits, u32 x_offset, u32 y_offset, u32 y_pitch, u32 width, u32 height, u8 *dst_bits, u8 *pU, u8 *pV, u8 *pA, Bool dst_yuv)
1660 {
1661 	u8 *pY;
1662 	pY = (u8*)src_bits;
1663 	if (!pU) {
1664 		pU = (u8*)src_bits + y_pitch*height;
1665 		pV = (u8*)src_bits + 5*y_pitch*height/4;
1666 		pA = (u8*)src_bits + 3*y_pitch*height/2;
1667 	}
1668 
1669 	pY += x_offset + y_offset*y_pitch;
1670 	pU += x_offset/2 + y_offset*y_pitch/4;
1671 	pV += x_offset/2 + y_offset*y_pitch/4;
1672 	pA += x_offset + y_offset*y_pitch;
1673 	yuva_load_lines(dst_bits, 4*width, pY, pU, pV, pA, y_pitch, y_pitch/2, width, dst_yuv);
1674 }
1675 
load_line_yuyv(u8 * src_bits,u32 x_offset,u32 y_offset,u32 y_pitch,u32 width,u32 height,u8 * dst_bits,Bool dst_yuv)1676 static void load_line_yuyv(u8 *src_bits, u32 x_offset, u32 y_offset, u32 y_pitch, u32 width, u32 height, u8 *dst_bits, Bool dst_yuv)
1677 {
1678 	u8 *pY, *pU, *pV;
1679 	pY = (u8 *)src_bits + x_offset + y_offset*y_pitch;
1680 	pU = (u8 *)pY + 1;
1681 	pV = (u8 *)pY + 3;
1682 	yuv_load_lines_packed((unsigned char*)dst_bits, 4*width, pY, pU, pV, width, dst_yuv);
1683 }
load_line_uyvy(u8 * src_bits,u32 x_offset,u32 y_offset,u32 y_pitch,u32 width,u32 height,u8 * dst_bits,Bool dst_yuv)1684 static void load_line_uyvy(u8 *src_bits, u32 x_offset, u32 y_offset, u32 y_pitch, u32 width, u32 height, u8 *dst_bits, Bool dst_yuv)
1685 {
1686 	u8 *pY, *pU, *pV;
1687 	pU = (u8 *)src_bits + x_offset + y_offset*y_pitch;
1688 	pY = (u8 *)pU + 1;
1689 	pV = (u8 *)pU + 2;
1690 	yuv_load_lines_packed((unsigned char*)dst_bits, 4*width, pY, pU, pV, width, dst_yuv);
1691 }
load_line_yvyu(u8 * src_bits,u32 x_offset,u32 y_offset,u32 y_pitch,u32 width,u32 height,u8 * dst_bits,Bool dst_yuv)1692 static void load_line_yvyu(u8 *src_bits, u32 x_offset, u32 y_offset, u32 y_pitch, u32 width, u32 height, u8 *dst_bits, Bool dst_yuv)
1693 {
1694 	u8 *pY, *pU, *pV;
1695 	pY = (u8 *)src_bits + x_offset + y_offset*y_pitch;
1696 	pV = (u8 *)pY + 1;
1697 	pU = (u8 *)pY + 3;
1698 	yuv_load_lines_packed((unsigned char*)dst_bits, 4*width, pY, pU, pV, width, dst_yuv);
1699 }
load_line_vyuy(u8 * src_bits,u32 x_offset,u32 y_offset,u32 y_pitch,u32 width,u32 height,u8 * dst_bits,Bool dst_yuv)1700 static void load_line_vyuy(u8 *src_bits, u32 x_offset, u32 y_offset, u32 y_pitch, u32 width, u32 height, u8 *dst_bits, Bool dst_yuv)
1701 {
1702 	u8 *pY, *pU, *pV;
1703 	pV = (u8 *)src_bits + x_offset + y_offset*y_pitch;
1704 	pY = (u8 *)pV + 1;
1705 	pU = (u8 *)pV + 2;
1706 	yuv_load_lines_packed((unsigned char*)dst_bits, 4*width, pY, pU, pV, width, dst_yuv);
1707 }
1708 
1709 
gf_yuv_load_lines_nv12_nv21(unsigned char * dst,s32 dststride,unsigned char * y_src,unsigned char * u_src,unsigned char * v_src,s32 y_stride,s32 width,Bool dst_yuv)1710 static void gf_yuv_load_lines_nv12_nv21(unsigned char *dst, s32 dststride, unsigned char *y_src, unsigned char *u_src, unsigned char *v_src, s32 y_stride, s32 width, Bool dst_yuv)
1711 {
1712 	u32 hw, x;
1713 	unsigned char *dst2 = (unsigned char *) dst + dststride;
1714 	unsigned char *y_src2 = (unsigned char *) y_src + y_stride;
1715 
1716 	hw = width / 2;
1717 	if (dst_yuv) {
1718 		for (x = 0; x < hw; x++) {
1719 
1720 			dst[0] = dst[4] = dst2[0] = dst2[4] = v_src[2*x];
1721 			dst[1] = dst[5] = dst2[1] = dst2[5] = u_src[2*x];
1722 			dst[2] = *y_src;
1723 			y_src++;
1724 			dst[3] = 0xFF;
1725 
1726 			dst[6] = *y_src;
1727 			y_src++;
1728 			dst[7] = 0xFF;
1729 
1730 			dst2[2] = *y_src2;
1731 			y_src2++;
1732 			dst2[3] = 0xFF;
1733 
1734 			dst2[6] = *y_src2;
1735 			y_src2++;
1736 			dst2[7] = 0xFF;
1737 
1738 			dst += 8;
1739 			dst2 += 8;
1740 		}
1741 		return;
1742 	}
1743 	for (x = 0; x < hw; x++) {
1744 		s32 u, v;
1745 		s32 b_u, g_uv, r_v, rgb_y;
1746 
1747 		u = u_src[2*x];
1748 		v = v_src[2*x];
1749 
1750 		b_u = B_U[u];
1751 		g_uv = G_U[u] + G_V[v];
1752 		r_v = R_V[v];
1753 
1754 		rgb_y = RGB_Y[*y_src];
1755 		dst[0] = col_clip( (rgb_y + r_v) >> SCALEBITS_OUT);
1756 		dst[1] = col_clip( (rgb_y - g_uv) >> SCALEBITS_OUT);
1757 		dst[2] = col_clip( (rgb_y + b_u) >> SCALEBITS_OUT);
1758 		dst[3] = 0xFF;
1759 		y_src++;
1760 
1761 		rgb_y = RGB_Y[*y_src];
1762 		dst[4] = col_clip( (rgb_y + r_v) >> SCALEBITS_OUT);
1763 		dst[5] = col_clip( (rgb_y - g_uv) >> SCALEBITS_OUT);
1764 		dst[6] = col_clip( (rgb_y + b_u) >> SCALEBITS_OUT);
1765 		dst[7] = 0xFF;
1766 		y_src++;
1767 
1768 		rgb_y = RGB_Y[*y_src2];
1769 		dst2[0] = col_clip( (rgb_y + r_v) >> SCALEBITS_OUT);
1770 		dst2[1] = col_clip( (rgb_y - g_uv) >> SCALEBITS_OUT);
1771 		dst2[2] = col_clip( (rgb_y + b_u) >> SCALEBITS_OUT);
1772 		dst2[3] = 0xFF;
1773 		y_src2++;
1774 
1775 		rgb_y = RGB_Y[*y_src2];
1776 		dst2[4] = col_clip( (rgb_y + r_v) >> SCALEBITS_OUT);
1777 		dst2[5] = col_clip( (rgb_y - g_uv) >> SCALEBITS_OUT);
1778 		dst2[6] = col_clip( (rgb_y + b_u) >> SCALEBITS_OUT);
1779 		dst2[7] = 0xFF;
1780 		y_src2++;
1781 
1782 		dst += 8;
1783 		dst2 += 8;
1784 	}
1785 }
1786 
load_line_nv12(char * src_bits,u32 x_offset,u32 y_offset,u32 y_pitch,u32 width,u32 height,u8 * dst_bits,u8 * pU,Bool dst_yuv)1787 static void load_line_nv12(char *src_bits, u32 x_offset, u32 y_offset, u32 y_pitch, u32 width, u32 height, u8 *dst_bits, u8 *pU, Bool dst_yuv)
1788 {
1789 	u8 *pY = (u8*)src_bits;
1790 	if (!pU) {
1791 		pU = (u8*)src_bits + y_pitch*height;
1792 	}
1793 
1794 	pY += x_offset + y_offset*y_pitch;
1795 	pU += x_offset + y_offset*y_pitch/2; //half vertical sampling
1796 	gf_yuv_load_lines_nv12_nv21(dst_bits, 4*width, pY, pU, pU + 1, y_pitch, width, dst_yuv);
1797 }
load_line_nv21(char * src_bits,u32 x_offset,u32 y_offset,u32 y_pitch,u32 width,u32 height,u8 * dst_bits,u8 * pU,Bool dst_yuv)1798 static void load_line_nv21(char *src_bits, u32 x_offset, u32 y_offset, u32 y_pitch, u32 width, u32 height, u8 *dst_bits, u8 *pU, Bool dst_yuv)
1799 {
1800 	u8 *pY = (u8*)src_bits;
1801 	if (!pU) {
1802 		pU = (u8*)src_bits + y_pitch*height;
1803 	}
1804 
1805 	pY += x_offset + y_offset*y_pitch;
1806 	pU += x_offset + y_offset*y_pitch/2; //half vertical sampling
1807 	gf_yuv_load_lines_nv12_nv21(dst_bits, 4*width, pY, pU+1, pU, y_pitch, width, dst_yuv);
1808 }
1809 
1810 static void gf_cmx_apply_argb(GF_ColorMatrix *_this, u8 *a_, u8 *r_, u8 *g_, u8 *b_);
1811 
1812 //#define COLORKEY_MPEG4_STRICT
1813 
format_is_yuv(u32 in_pf)1814 static Bool format_is_yuv(u32 in_pf)
1815 {
1816 	switch (in_pf) {
1817 	case GF_PIXEL_YUYV:
1818 	case GF_PIXEL_YVYU:
1819 	case GF_PIXEL_UYVY:
1820 	case GF_PIXEL_VYUY:
1821 	case GF_PIXEL_YUV:
1822 	case GF_PIXEL_YUV_10:
1823 	case GF_PIXEL_YUV422:
1824 	case GF_PIXEL_YUV422_10:
1825 	case GF_PIXEL_YUV444:
1826 	case GF_PIXEL_YUV444_10:
1827 		return GF_TRUE;
1828 		/*not supported yet*/
1829 	case GF_PIXEL_YUVA:
1830 	default:
1831 		return GF_FALSE;
1832 	}
1833 }
1834 
1835 GF_EXPORT
gf_stretch_bits(GF_VideoSurface * dst,GF_VideoSurface * src,GF_Window * dst_wnd,GF_Window * src_wnd,u8 alpha,Bool flip,GF_ColorKey * key,GF_ColorMatrix * cmat)1836 GF_Err gf_stretch_bits(GF_VideoSurface *dst, GF_VideoSurface *src, GF_Window *dst_wnd, GF_Window *src_wnd, u8 alpha, Bool flip, GF_ColorKey *key, GF_ColorMatrix *cmat)
1837 {
1838 	u8 *tmp, *rows;
1839 	u8 ka=0, kr=0, kg=0, kb=0, kl=0, kh=0;
1840 	s32 src_row;
1841 	u32 i, yuv_planar_type = 0;
1842 	Bool no_memcpy, dst_yuv = GF_FALSE;
1843 	Bool force_load_odd_yuv_lines = GF_FALSE;
1844 	Bool yuv_init = GF_FALSE;
1845 	Bool has_alpha = (alpha!=0xFF) ? GF_TRUE : GF_FALSE;
1846 	u32 dst_bpp, dst_w_size;
1847 	s32 pos_y, inc_y, inc_x, prev_row, x_off;
1848 	u32 src_w, src_h, dst_w, dst_h;
1849 	u8 *dst_bits = NULL, *dst_bits_prev = NULL, *dst_temp_bits = NULL;
1850 	s32 dst_x_pitch = dst->pitch_x;
1851 	copy_row_proto copy_row = NULL;
1852 	load_line_proto load_line = NULL;
1853 
1854 	if (cmat && (cmat->m[15] || cmat->m[16] || cmat->m[17] || (cmat->m[18]!=FIX_ONE) || cmat->m[19] )) has_alpha = GF_TRUE;
1855 	else if (key && (key->alpha<0xFF)) has_alpha = GF_TRUE;
1856 
1857 	//check if we have a dedicated copy/conv when no stretch nor blending (avoids line load while copying)
1858 	if ((alpha==0xFF) && !flip && !key && !cmat) {
1859 		Bool no_stretch = GF_FALSE;
1860 		Bool output_yuv = format_is_yuv(dst->pixel_format);
1861 		GF_Err e = GF_NOT_SUPPORTED;
1862 
1863 		if (!dst_wnd) no_stretch = GF_TRUE;
1864 		else if (src_wnd) {
1865 			if (!dst_wnd->x && !dst_wnd->y && (dst_wnd->w==src_wnd->w) && (dst_wnd->h==src_wnd->h))
1866 				no_stretch = GF_TRUE;
1867 		} else {
1868 			if ((dst_wnd->w==src->width) && (dst_wnd->h==src->height))
1869 				no_stretch = GF_TRUE;
1870 		}
1871 		if (no_stretch && output_yuv) {
1872 			//check YUV10->8, YUV->YUV
1873 			switch (src->pixel_format) {
1874 			case GF_PIXEL_NV12_10:
1875 				e = color_write_nv12_10_to_yuv(dst, src, src_wnd, GF_FALSE);
1876 				break;
1877 			case GF_PIXEL_NV21_10:
1878 				e = color_write_nv12_10_to_yuv(dst, src, src_wnd, GF_TRUE);
1879 				break;
1880 			case GF_PIXEL_YUV_10:
1881 				e = color_write_yv12_10_to_yuv(dst, src, src_wnd, GF_FALSE);
1882 				break;
1883 			case GF_PIXEL_YUV422_10:
1884 				if (dst->pixel_format == GF_PIXEL_YUV422)
1885 					e = color_write_yuv422_10_to_yuv422(dst, src, src_wnd, GF_FALSE);
1886 				else if (dst->pixel_format == GF_PIXEL_YUV)
1887 					e = color_write_yuv422_10_to_yuv(dst, src, src_wnd, GF_FALSE);
1888 				break;
1889 			case GF_PIXEL_YUV444_10:
1890 				if (dst->pixel_format == GF_PIXEL_YUV444)
1891 					e = color_write_yuv444_10_to_yuv444(dst, src, src_wnd, GF_FALSE);
1892 				else if (dst->pixel_format == GF_PIXEL_YUV)
1893 					e = color_write_yuv444_10_to_yuv(dst, src, src_wnd, GF_FALSE);
1894 				break;
1895 			case GF_PIXEL_YUV:
1896 				e = color_write_yuv420_to_yuv(dst, src, src_wnd, GF_FALSE);
1897 				break;
1898 			case GF_PIXEL_YUV422:
1899 				e = color_write_yuv422_to_yuv(dst, src, src_wnd, GF_FALSE);
1900 				break;
1901 			case GF_PIXEL_YUV444:
1902 				e = color_write_yuv444_to_yuv(dst, src, src_wnd, GF_FALSE);
1903 				break;
1904 			case GF_PIXEL_YUYV:
1905 			case GF_PIXEL_YVYU:
1906 			case GF_PIXEL_UYVY:
1907 			case GF_PIXEL_VYUY:
1908 				e = color_write_yvyu_to_yuv(dst, src, src_wnd, GF_FALSE);
1909 				break;
1910 			}
1911 		}
1912 		else if (no_stretch && !output_yuv) {
1913 			//check rgb->rgb copy
1914 			switch (dst->pixel_format) {
1915 			case GF_PIXEL_RGB:
1916 			case GF_PIXEL_RGBS:
1917 			case GF_PIXEL_BGR:
1918 				e = color_write_rgb_to_24(dst, src, src_wnd);
1919 				break;
1920 			case GF_PIXEL_RGBX:
1921 			case GF_PIXEL_XRGB:
1922 			case GF_PIXEL_RGBD:
1923 			case GF_PIXEL_RGBDS:
1924 			case GF_PIXEL_BGRX:
1925 			case GF_PIXEL_XBGR:
1926 				e = color_write_rgb_to_32(dst, src, src_wnd);
1927 				break;
1928 			default:
1929 				break;
1930 			}
1931 		}
1932 		if (e == GF_OK) return GF_OK;
1933 	}
1934 
1935 
1936 	switch (src->pixel_format) {
1937 	case GF_PIXEL_GREYSCALE:
1938 		load_line = load_line_grey;
1939 		break;
1940 	case GF_PIXEL_ALPHAGREY:
1941 		load_line = load_line_alpha_grey;
1942 		has_alpha = GF_TRUE;
1943 		break;
1944 	case GF_PIXEL_GREYALPHA:
1945 		load_line = load_line_grey_alpha;
1946 		has_alpha = GF_TRUE;
1947 		break;
1948 	case GF_PIXEL_RGB_555:
1949 		load_line = load_line_rgb_555;
1950 		break;
1951 	case GF_PIXEL_RGB_565:
1952 		load_line = load_line_rgb_565;
1953 		break;
1954 	case GF_PIXEL_RGB:
1955 	case GF_PIXEL_RGBS:
1956 		load_line = load_line_rgb_24;
1957 		break;
1958 	case GF_PIXEL_BGR:
1959 		load_line = load_line_bgr_24;
1960 		break;
1961 	case GF_PIXEL_ARGB:
1962 		has_alpha = GF_TRUE;
1963 		load_line = load_line_argb;
1964 		break;
1965 	case GF_PIXEL_BGRA:
1966 		has_alpha = GF_TRUE;
1967 		load_line = load_line_bgra;
1968 		break;
1969 	case GF_PIXEL_RGBA:
1970 	case GF_PIXEL_RGBAS:
1971 		has_alpha = GF_TRUE;
1972 	case GF_PIXEL_RGBX:
1973 		load_line = load_line_rgb_32;
1974 		break;
1975 	case GF_PIXEL_XRGB:
1976 		load_line = load_line_xrgb;
1977 		break;
1978 	case GF_PIXEL_BGRX:
1979 		load_line = load_line_bgrx;
1980 		break;
1981 	case GF_PIXEL_RGBDS:
1982 		load_line = load_line_rgbds;
1983 		has_alpha = GF_TRUE;
1984 		break;
1985 	case GF_PIXEL_RGBD:
1986 		load_line = load_line_rgbd;
1987 		break;
1988 	case GF_PIXEL_YUV:
1989 		yuv2rgb_init();
1990 		yuv_planar_type = 1;
1991 		break;
1992 	case GF_PIXEL_YUV422:
1993 		yuv2rgb_init();
1994 		yuv_planar_type = 4;
1995 		break;
1996 	case GF_PIXEL_YUV444:
1997 		yuv2rgb_init();
1998 		yuv_planar_type = 5;
1999 		break;
2000 
2001 	case GF_PIXEL_YUV_10:
2002 		yuv2rgb_init();
2003 		yuv_planar_type = 3;
2004 		break;
2005 	case GF_PIXEL_YUV422_10:
2006 		yuv2rgb_init();
2007 		yuv_planar_type = 6;
2008 		break;
2009 	case GF_PIXEL_YUV444_10:
2010 		yuv2rgb_init();
2011 		yuv_planar_type = 7;
2012 		break;
2013 	case GF_PIXEL_NV21:
2014 		yuv2rgb_init();
2015 		yuv_planar_type = 8;
2016 		break;
2017 	case GF_PIXEL_NV12:
2018 		yuv2rgb_init();
2019 		yuv_planar_type = 9;
2020 		break;
2021 	case GF_PIXEL_YUVA:
2022 		has_alpha = GF_TRUE;
2023 	case GF_PIXEL_YUVD:
2024 		yuv_planar_type = 2;
2025 		yuv2rgb_init();
2026 		break;
2027 	case GF_PIXEL_YUYV:
2028 		yuv_planar_type = 0;
2029 		yuv2rgb_init();
2030 		load_line = load_line_yuyv;
2031 		break;
2032 	case GF_PIXEL_UYVY:
2033 		yuv_planar_type = 0;
2034 		yuv2rgb_init();
2035 		load_line = load_line_uyvy;
2036 		break;
2037 	case GF_PIXEL_YVYU:
2038 		yuv_planar_type = 0;
2039 		yuv2rgb_init();
2040 		load_line = load_line_yvyu;
2041 		break;
2042 	case GF_PIXEL_VYUY:
2043 		yuv_planar_type = 0;
2044 		yuv2rgb_init();
2045 		load_line = load_line_vyuy;
2046 		break;
2047 	default:
2048 		GF_LOG(GF_LOG_INFO, GF_LOG_CORE, ("Source pixel format %s not supported by gf_stretch_bits\n", gf_pixel_fmt_name(src->pixel_format) ));
2049 		return GF_NOT_SUPPORTED;
2050 	}
2051 
2052 	/*only RGB output supported*/
2053 	switch (dst->pixel_format) {
2054 	case GF_PIXEL_RGB_555:
2055 		dst_bpp = sizeof(unsigned char)*2;
2056 		copy_row = has_alpha ? merge_row_rgb_555 : copy_row_rgb_555;
2057 		break;
2058 	case GF_PIXEL_RGB_565:
2059 		dst_bpp = sizeof(unsigned char)*2;
2060 		copy_row = has_alpha ? merge_row_rgb_565 : copy_row_rgb_565;
2061 		break;
2062 	case GF_PIXEL_RGB:
2063 		dst_bpp = sizeof(unsigned char)*3;
2064 		copy_row = has_alpha ? merge_row_rgb_24 : copy_row_rgb_24;
2065 		break;
2066 	case GF_PIXEL_BGR:
2067 		dst_bpp = sizeof(unsigned char)*3;
2068 		copy_row = has_alpha ? merge_row_bgr_24 : copy_row_bgr_24;
2069 		break;
2070 	case GF_PIXEL_RGBX:
2071 		dst_bpp = sizeof(unsigned char)*4;
2072 		copy_row = has_alpha ? merge_row_bgrx : copy_row_bgrx;
2073 		break;
2074 	case GF_PIXEL_ARGB:
2075 		dst_bpp = sizeof(unsigned char)*4;
2076 		copy_row = has_alpha ? merge_row_argb : copy_row_argb;
2077 		break;
2078 	case GF_PIXEL_BGRA:
2079 		dst_bpp = sizeof(unsigned char)*4;
2080 		copy_row = has_alpha ? merge_row_bgra : copy_row_bgrx;
2081 		break;
2082 	case GF_PIXEL_RGBD:
2083 		dst_bpp = sizeof(unsigned char)*4;
2084 		copy_row = has_alpha ? merge_row_bgrx : copy_row_rgbd;
2085 		break;
2086 	case GF_PIXEL_RGBA:
2087 		dst_bpp = sizeof(unsigned char)*4;
2088 		copy_row = has_alpha ? merge_row_rgba : copy_row_rgbx;
2089 		break;
2090 	case GF_PIXEL_BGRX:
2091 		dst_bpp = sizeof(unsigned char)*4;
2092 		copy_row = has_alpha ? merge_row_rgbx : copy_row_rgbx;
2093 		break;
2094 #if 0
2095 	//yuv dest not yet supported
2096 	case GF_PIXEL_YUV444:
2097 		dst_bpp = sizeof(unsigned char)*3;
2098 		copy_row = has_alpha ? merge_row_yuv444 : copy_row_yuv444;
2099 		dst_yuv = GF_TRUE;
2100 		break;
2101 #endif
2102 	default:
2103 		GF_LOG(GF_LOG_INFO, GF_LOG_CORE, ("Destination pixel format %s not supported by gf_stretch_bits, patch welcome\n", gf_pixel_fmt_name(dst->pixel_format) ));
2104 		return GF_NOT_SUPPORTED;
2105 	}
2106 	/*x_pitch 0 means linear framebuffer*/
2107 	if (!dst_x_pitch) dst_x_pitch = dst_bpp;
2108 
2109 
2110 	src_w = src_wnd ? src_wnd->w : src->width;
2111 	src_h = src_wnd ? src_wnd->h : src->height;
2112 	dst_w = dst_wnd ? dst_wnd->w : dst->width;
2113 	dst_h = dst_wnd ? dst_wnd->h : dst->height;
2114 
2115 	if (yuv_planar_type && (src_w%2)) src_w++;
2116 
2117 	tmp = (u8 *) gf_malloc(sizeof(u8) * src_w * (yuv_planar_type ? 8 : 4) );
2118 	rows = tmp;
2119 
2120 	if ( (src_h / dst_h) * dst_h != src_h) force_load_odd_yuv_lines = GF_TRUE;
2121 
2122 	pos_y = 0x10000;
2123 	inc_y = (src_h << 16) / dst_h;
2124 	inc_x = (src_w << 16) / dst_w;
2125 	x_off = src_wnd ? src_wnd->x : 0;
2126 	src_row = src_wnd ? src_wnd->y : 0;
2127 
2128 	prev_row = -1;
2129 
2130 	dst_bits = (u8 *) dst->video_buffer;
2131 	if (dst_wnd) dst_bits += ((s32)dst_wnd->x) * dst_x_pitch + ((s32)dst_wnd->y) * dst->pitch_y;
2132 
2133 	dst_w_size = dst_bpp*dst_w;
2134 
2135 	if (key) {
2136 		ka = key->alpha;
2137 		kr = key->r;
2138 		kg = key->g;
2139 		kb = key->b;
2140 		kl = key->low;
2141 		kh = key->high;
2142 		if (kh==kl) kh++;
2143 	}
2144 
2145 	/*do NOT use memcpy if the target buffer is not in systems memory*/
2146 	no_memcpy = (has_alpha || dst->is_hardware_memory || (dst_bpp!=dst_x_pitch)) ? GF_TRUE : GF_FALSE;
2147 
2148 	while (dst_h) {
2149 		while ( pos_y >= 0x10000L ) {
2150 			src_row++;
2151 			pos_y -= 0x10000L;
2152 		}
2153 		/*new row, check if conversion is needed*/
2154 		if (prev_row != src_row) {
2155 			u32 the_row = src_row - 1;
2156 			if (yuv_planar_type) {
2157 				if (the_row % 2) {
2158 					if (!yuv_init || force_load_odd_yuv_lines) {
2159 						yuv_init = GF_TRUE;
2160 						the_row--;
2161 						if (flip) the_row = src->height - 2 - the_row;
2162 						if (yuv_planar_type == 1) {
2163 							load_line_yv12(src->video_buffer, x_off, the_row, src->pitch_y, src_w, src->height, tmp, (u8 *)src->u_ptr, (u8 *)src->v_ptr, dst_yuv);
2164 						}
2165 						else if (yuv_planar_type == 4) {
2166 							load_line_yuv422(src->video_buffer, x_off, the_row, src->pitch_y, src_w, src->height, tmp, (u8 *)src->u_ptr, (u8 *)src->v_ptr, dst_yuv);
2167 						}
2168 						else if (yuv_planar_type == 5) {
2169 							load_line_yuv444(src->video_buffer, x_off, the_row, src->pitch_y, src_w, src->height, tmp, (u8 *)src->u_ptr, (u8 *)src->v_ptr, dst_yuv);
2170 						}
2171 						else if (yuv_planar_type == 3) {
2172 							load_line_yv12_10((char *)src->video_buffer, x_off, the_row, src->pitch_y, src_w, src->height, tmp, (u8 *)src->u_ptr, (u8 *)src->v_ptr, dst_yuv);
2173 						}
2174 						else if (yuv_planar_type == 6) {
2175 							load_line_yuv422_10((char *)src->video_buffer, x_off, the_row, src->pitch_y, src_w, src->height, tmp, (u8 *)src->u_ptr, (u8 *)src->v_ptr, dst_yuv);
2176 						}
2177 						else if (yuv_planar_type == 7) {
2178 							load_line_yuv444_10((char *)src->video_buffer, x_off, the_row, src->pitch_y, src_w, src->height, tmp, (u8 *)src->u_ptr, (u8 *)src->v_ptr, dst_yuv);
2179 						}
2180 						else if (yuv_planar_type == 8) {
2181 							load_line_nv21((char *)src->video_buffer, x_off, the_row, src->pitch_y, src_w, src->height, tmp, (u8 *)src->u_ptr, dst_yuv);
2182 						}
2183 						else if (yuv_planar_type == 9) {
2184 							load_line_nv12((char *)src->video_buffer, x_off, the_row, src->pitch_y, src_w, src->height, tmp, (u8 *)src->u_ptr, dst_yuv);
2185 						}
2186 						else {
2187 							load_line_yuva(src->video_buffer, x_off, the_row, src->pitch_y, src_w, src->height, tmp, (u8 *)src->u_ptr, (u8 *)src->v_ptr, (u8 *)src->a_ptr, dst_yuv);
2188 						}
2189 
2190 						if (cmat) {
2191 							for (i=0; i<2*src_w; i++) {
2192 								u32 idx = 4*i;
2193 								gf_cmx_apply_argb(cmat, (u8 *) &tmp[idx+3], (u8 *) &tmp[idx], (u8 *) &tmp[idx+1], (u8 *) &tmp[idx+2]);
2194 							}
2195 						}
2196 						if (key) {
2197 							for (i=0; i<2*src_w; i++) {
2198 								u32 idx = 4*i;
2199 								s32 thres, v;
2200 								v = tmp[idx]-kr;
2201 								thres = ABS(v);
2202 								v = tmp[idx+1]-kg;
2203 								thres += ABS(v);
2204 								v = tmp[idx+2]-kb;
2205 								thres += ABS(v);
2206 								thres/=3;
2207 #ifdef COLORKEY_MPEG4_STRICT
2208 								if (thres < kl) tmp[idx+3] = 0;
2209 								else if (thres <= kh) tmp[idx+3] = (thres-kl)*ka / (kh-kl);
2210 #else
2211 								if (thres < kh) tmp[idx+3] = 0;
2212 #endif
2213 								else tmp[idx+3] = ka;
2214 							}
2215 						}
2216 					}
2217 					rows = flip ? tmp : tmp + src_w * 4;
2218 				}
2219 				else {
2220 					if (flip) the_row = src->height - 2 - the_row;
2221 					if (yuv_planar_type == 1) {
2222 						load_line_yv12(src->video_buffer, x_off, the_row, src->pitch_y, src_w, src->height, tmp, (u8 *)src->u_ptr, (u8 *)src->v_ptr, dst_yuv);
2223 					}
2224 					else if (yuv_planar_type == 4) {
2225 						load_line_yuv422(src->video_buffer, x_off, the_row, src->pitch_y, src_w, src->height, tmp, (u8 *)src->u_ptr, (u8 *)src->v_ptr, dst_yuv);
2226 					}
2227 					else if (yuv_planar_type == 5) {
2228 						load_line_yuv444(src->video_buffer, x_off, the_row, src->pitch_y, src_w, src->height, tmp, (u8 *)src->u_ptr, (u8 *)src->v_ptr, dst_yuv);
2229 					}
2230 					else if (yuv_planar_type == 3) {
2231 						load_line_yv12_10((char *)src->video_buffer, x_off, the_row, src->pitch_y, src_w, src->height, tmp, (u8 *)src->u_ptr, (u8 *)src->v_ptr, dst_yuv);
2232 					}
2233 					else if (yuv_planar_type == 6) {
2234 						load_line_yuv422_10((char *)src->video_buffer, x_off, the_row, src->pitch_y, src_w, src->height, tmp, (u8 *)src->u_ptr, (u8 *)src->v_ptr, dst_yuv);
2235 					}
2236 					else if (yuv_planar_type == 7) {
2237 						load_line_yuv444_10((char *)src->video_buffer, x_off, the_row, src->pitch_y, src_w, src->height, tmp, (u8 *)src->u_ptr, (u8 *)src->v_ptr, dst_yuv);
2238 					}
2239 					else if (yuv_planar_type == 8) {
2240 						load_line_nv21((char *)src->video_buffer, x_off, the_row, src->pitch_y, src_w, src->height, tmp, (u8 *)src->u_ptr, dst_yuv);
2241 					}
2242 					else if (yuv_planar_type == 9) {
2243 						load_line_nv12((char *)src->video_buffer, x_off, the_row, src->pitch_y, src_w, src->height, tmp, (u8 *)src->u_ptr, dst_yuv);
2244 					}
2245 					else {
2246 						load_line_yuva(src->video_buffer, x_off, the_row, src->pitch_y, src_w, src->height, tmp, (u8 *)src->u_ptr, (u8 *)src->v_ptr, (u8 *)src->a_ptr, dst_yuv);
2247 					}
2248 					yuv_init = GF_TRUE;
2249 					rows = flip ? tmp + src_w * 4 : tmp;
2250 
2251 					if (cmat) {
2252 						for (i=0; i<2*src_w; i++) {
2253 							u32 idx = 4*i;
2254 							gf_cmx_apply_argb(cmat, &tmp[idx+3], &tmp[idx], &tmp[idx+1], &tmp[idx+2]);
2255 						}
2256 					}
2257 					if (key) {
2258 						for (i=0; i<2*src_w; i++) {
2259 							u32 idx = 4*i;
2260 							s32 thres, v;
2261 							v = tmp[idx]-kr;
2262 							thres = ABS(v);
2263 							v = tmp[idx+1]-kg;
2264 							thres += ABS(v);
2265 							v = tmp[idx+2]-kb;
2266 							thres += ABS(v);
2267 							thres/=3;
2268 #ifdef COLORKEY_MPEG4_STRICT
2269 							if (thres < kl) tmp[idx+3] = 0;
2270 							else if (thres <= kh) tmp[idx+3] = (thres-kl)*ka / (kh-kl);
2271 #else
2272 							if (thres < kh) tmp[idx+3] = 0;
2273 #endif
2274 							else tmp[idx+3] = ka;
2275 						}
2276 					}
2277 				}
2278 			} else {
2279 				if (flip) the_row = src->height-1 - the_row;
2280 				load_line((u8*)src->video_buffer, x_off, the_row, src->pitch_y, src_w, src->height, tmp, dst_yuv);
2281 				rows = tmp;
2282 				if (cmat) {
2283 					for (i=0; i<src_w; i++) {
2284 						u32 idx = 4*i;
2285 						gf_cmx_apply_argb(cmat, &tmp[idx+3], &tmp[idx], &tmp[idx+1], &tmp[idx+2]);
2286 					}
2287 				}
2288 				if (key) {
2289 					for (i=0; i<src_w; i++) {
2290 						u32 idx = 4*i;
2291 						s32 thres, v;
2292 						v = tmp[idx]-kr;
2293 						thres = ABS(v);
2294 						v = tmp[idx+1]-kg;
2295 						thres += ABS(v);
2296 						v = tmp[idx+2]-kb;
2297 						thres += ABS(v);
2298 						thres/=3;
2299 #ifdef COLORKEY_MPEG4_STRICT
2300 						if (thres < kl) tmp[idx+3] = 0;
2301 						else if (thres <= kh) tmp[idx+3] = (thres-kl)*ka / (kh-kl);
2302 #else
2303 						if (thres < kh) tmp[idx+3] = 0;
2304 #endif
2305 						else tmp[idx+3] = ka;
2306 					}
2307 				}
2308 			}
2309 			copy_row(rows, src_w, dst_bits, dst_w, inc_x, dst_x_pitch, alpha, dst->pitch_y, dst->height);
2310 		}
2311 		/*do NOT use memcpy if the target buffer is not in systems memory*/
2312 		else if (no_memcpy) {
2313 			copy_row(rows, src_w, dst_bits, dst_w, inc_x, dst_x_pitch, alpha, dst->pitch_y, dst->height);
2314 		} else if (dst_bits && dst_bits_prev) {
2315 			memcpy(dst_bits, dst_bits_prev, dst_w_size);
2316 		}
2317 
2318 		pos_y += inc_y;
2319 		prev_row = src_row;
2320 
2321 		dst_bits_prev = dst_bits;
2322 		dst_bits += dst->pitch_y;
2323 		dst_h--;
2324 	}
2325 	if (dst_temp_bits) gf_free(dst_temp_bits);
2326 	gf_free(tmp);
2327 	return GF_OK;
2328 }
2329 
2330 
2331 
2332 /*
2333 	COLOR MATRIX TOOLS
2334  */
2335 
2336 GF_EXPORT
gf_cmx_init(GF_ColorMatrix * _this)2337 void gf_cmx_init(GF_ColorMatrix *_this)
2338 {
2339 	if (!_this) return;
2340 	memset(_this->m, 0, sizeof(Fixed)*20);
2341 	_this->m[0] = _this->m[6] = _this->m[12] = _this->m[18] = FIX_ONE;
2342 	_this->identity = 1;
2343 }
2344 
2345 
gf_cmx_identity(GF_ColorMatrix * _this)2346 static void gf_cmx_identity(GF_ColorMatrix *_this)
2347 {
2348 	GF_ColorMatrix mat;
2349 	gf_cmx_init(&mat);
2350 	_this->identity = memcmp(_this->m, mat.m, sizeof(Fixed)*20) ? 0 : 1;
2351 }
2352 
2353 GF_EXPORT
gf_cmx_set(GF_ColorMatrix * _this,Fixed c1,Fixed c2,Fixed c3,Fixed c4,Fixed c5,Fixed c6,Fixed c7,Fixed c8,Fixed c9,Fixed c10,Fixed c11,Fixed c12,Fixed c13,Fixed c14,Fixed c15,Fixed c16,Fixed c17,Fixed c18,Fixed c19,Fixed c20)2354 void gf_cmx_set(GF_ColorMatrix *_this,
2355                 Fixed c1, Fixed c2, Fixed c3, Fixed c4, Fixed c5,
2356                 Fixed c6, Fixed c7, Fixed c8, Fixed c9, Fixed c10,
2357                 Fixed c11, Fixed c12, Fixed c13, Fixed c14, Fixed c15,
2358                 Fixed c16, Fixed c17, Fixed c18, Fixed c19, Fixed c20)
2359 {
2360 	if (!_this) return;
2361 	_this->m[0] = c1;
2362 	_this->m[1] = c2;
2363 	_this->m[2] = c3;
2364 	_this->m[3] = c4;
2365 	_this->m[4] = c5;
2366 	_this->m[5] = c6;
2367 	_this->m[6] = c7;
2368 	_this->m[7] = c8;
2369 	_this->m[8] = c9;
2370 	_this->m[9] = c10;
2371 	_this->m[10] = c11;
2372 	_this->m[11] = c12;
2373 	_this->m[12] = c13;
2374 	_this->m[13] = c14;
2375 	_this->m[14] = c15;
2376 	_this->m[15] = c16;
2377 	_this->m[16] = c17;
2378 	_this->m[17] = c18;
2379 	_this->m[18] = c19;
2380 	_this->m[19] = c20;
2381 	gf_cmx_identity(_this);
2382 }
2383 
2384 GF_EXPORT
gf_cmx_copy(GF_ColorMatrix * _this,GF_ColorMatrix * from)2385 void gf_cmx_copy(GF_ColorMatrix *_this, GF_ColorMatrix *from)
2386 {
2387 	if (!_this || !from) return;
2388 	memcpy(_this->m, from->m, sizeof(Fixed)*20);
2389 	gf_cmx_identity(_this);
2390 }
2391 
2392 
2393 GF_EXPORT
gf_cmx_multiply(GF_ColorMatrix * _this,GF_ColorMatrix * w)2394 void gf_cmx_multiply(GF_ColorMatrix *_this, GF_ColorMatrix *w)
2395 {
2396 	Fixed res[20];
2397 	if (!_this || !w || w->identity) return;
2398 	if (_this->identity) {
2399 		gf_cmx_copy(_this, w);
2400 		return;
2401 	}
2402 
2403 	res[0] = gf_mulfix(_this->m[0], w->m[0]) + gf_mulfix(_this->m[1], w->m[5]) + gf_mulfix(_this->m[2], w->m[10]) + gf_mulfix(_this->m[3], w->m[15]);
2404 	res[1] = gf_mulfix(_this->m[0], w->m[1]) + gf_mulfix(_this->m[1], w->m[6]) + gf_mulfix(_this->m[2], w->m[11]) + gf_mulfix(_this->m[3], w->m[16]);
2405 	res[2] = gf_mulfix(_this->m[0], w->m[2]) + gf_mulfix(_this->m[1], w->m[7]) + gf_mulfix(_this->m[2], w->m[12]) + gf_mulfix(_this->m[3], w->m[17]);
2406 	res[3] = gf_mulfix(_this->m[0], w->m[3]) + gf_mulfix(_this->m[1], w->m[8]) + gf_mulfix(_this->m[2], w->m[13]) + gf_mulfix(_this->m[3], w->m[18]);
2407 	res[4] = gf_mulfix(_this->m[0], w->m[4]) + gf_mulfix(_this->m[1], w->m[9]) + gf_mulfix(_this->m[2], w->m[14]) + gf_mulfix(_this->m[3], w->m[19]) + _this->m[4];
2408 
2409 	res[5] = gf_mulfix(_this->m[5], w->m[0]) + gf_mulfix(_this->m[6], w->m[5]) + gf_mulfix(_this->m[7], w->m[10]) + gf_mulfix(_this->m[8], w->m[15]);
2410 	res[6] = gf_mulfix(_this->m[5], w->m[1]) + gf_mulfix(_this->m[6], w->m[6]) + gf_mulfix(_this->m[7], w->m[11]) + gf_mulfix(_this->m[8], w->m[16]);
2411 	res[7] = gf_mulfix(_this->m[5], w->m[2]) + gf_mulfix(_this->m[6], w->m[7]) + gf_mulfix(_this->m[7], w->m[12]) + gf_mulfix(_this->m[8], w->m[17]);
2412 	res[8] = gf_mulfix(_this->m[5], w->m[3]) + gf_mulfix(_this->m[6], w->m[8]) + gf_mulfix(_this->m[7], w->m[13]) + gf_mulfix(_this->m[8], w->m[18]);
2413 	res[9] = gf_mulfix(_this->m[5], w->m[4]) + gf_mulfix(_this->m[6], w->m[9]) + gf_mulfix(_this->m[7], w->m[14]) + gf_mulfix(_this->m[8], w->m[19]) + _this->m[9];
2414 
2415 	res[10] = gf_mulfix(_this->m[10], w->m[0]) + gf_mulfix(_this->m[11], w->m[5]) + gf_mulfix(_this->m[12], w->m[10]) + gf_mulfix(_this->m[13], w->m[15]);
2416 	res[11] = gf_mulfix(_this->m[10], w->m[1]) + gf_mulfix(_this->m[11], w->m[6]) + gf_mulfix(_this->m[12], w->m[11]) + gf_mulfix(_this->m[13], w->m[16]);
2417 	res[12] = gf_mulfix(_this->m[10], w->m[2]) + gf_mulfix(_this->m[11], w->m[7]) + gf_mulfix(_this->m[12], w->m[12]) + gf_mulfix(_this->m[13], w->m[17]);
2418 	res[13] = gf_mulfix(_this->m[10], w->m[3]) + gf_mulfix(_this->m[11], w->m[8]) + gf_mulfix(_this->m[12], w->m[13]) + gf_mulfix(_this->m[13], w->m[18]);
2419 	res[14] = gf_mulfix(_this->m[10], w->m[4]) + gf_mulfix(_this->m[11], w->m[9]) + gf_mulfix(_this->m[12], w->m[14]) + gf_mulfix(_this->m[13], w->m[19]) + _this->m[14];
2420 
2421 	res[15] = gf_mulfix(_this->m[15], w->m[0]) + gf_mulfix(_this->m[16], w->m[5]) + gf_mulfix(_this->m[17], w->m[10]) + gf_mulfix(_this->m[18], w->m[15]);
2422 	res[16] = gf_mulfix(_this->m[15], w->m[1]) + gf_mulfix(_this->m[16], w->m[6]) + gf_mulfix(_this->m[17], w->m[11]) + gf_mulfix(_this->m[18], w->m[16]);
2423 	res[17] = gf_mulfix(_this->m[15], w->m[2]) + gf_mulfix(_this->m[16], w->m[7]) + gf_mulfix(_this->m[17], w->m[12]) + gf_mulfix(_this->m[18], w->m[17]);
2424 	res[18] = gf_mulfix(_this->m[15], w->m[3]) + gf_mulfix(_this->m[16], w->m[8]) + gf_mulfix(_this->m[17], w->m[13]) + gf_mulfix(_this->m[18], w->m[18]);
2425 	res[19] = gf_mulfix(_this->m[15], w->m[4]) + gf_mulfix(_this->m[16], w->m[9]) + gf_mulfix(_this->m[17], w->m[14]) + gf_mulfix(_this->m[18], w->m[19]) + _this->m[19];
2426 	memcpy(_this->m, res, sizeof(Fixed)*20);
2427 	gf_cmx_identity(_this);
2428 }
2429 
2430 #define CLIP_COMP(val)	{ if (val<0) { val=0; } else if (val>FIX_ONE) { val=FIX_ONE;} }
2431 
gf_cmx_apply_argb(GF_ColorMatrix * _this,u8 * a_,u8 * r_,u8 * g_,u8 * b_)2432 static void gf_cmx_apply_argb(GF_ColorMatrix *_this, u8 *a_, u8 *r_, u8 *g_, u8 *b_)
2433 {
2434 	Fixed _a, _r, _g, _b, a, r, g, b;
2435 	if (!_this || _this->identity) return;
2436 
2437 	a = INT2FIX(*a_)/255;
2438 	r = INT2FIX(*r_)/255;
2439 	g = INT2FIX(*g_)/255;
2440 	b = INT2FIX(*b_)/255;
2441 	_r = gf_mulfix(r, _this->m[0]) + gf_mulfix(g, _this->m[1]) + gf_mulfix(b, _this->m[2]) + gf_mulfix(a, _this->m[3]) + _this->m[4];
2442 	_g = gf_mulfix(r, _this->m[5]) + gf_mulfix(g, _this->m[6]) + gf_mulfix(b, _this->m[7]) + gf_mulfix(a, _this->m[8]) + _this->m[9];
2443 	_b = gf_mulfix(r, _this->m[10]) + gf_mulfix(g, _this->m[11]) + gf_mulfix(b, _this->m[12]) + gf_mulfix(a, _this->m[13]) + _this->m[14];
2444 	_a = gf_mulfix(r, _this->m[15]) + gf_mulfix(g, _this->m[16]) + gf_mulfix(b, _this->m[17]) + gf_mulfix(a, _this->m[18]) + _this->m[19];
2445 	CLIP_COMP(_a);
2446 	CLIP_COMP(_r);
2447 	CLIP_COMP(_g);
2448 	CLIP_COMP(_b);
2449 
2450 	*a_ = FIX2INT(_a*255);
2451 	*r_ = FIX2INT(_r*255);
2452 	*g_ = FIX2INT(_g*255);
2453 	*b_ = FIX2INT(_b*255);
2454 }
2455 
2456 
2457 GF_EXPORT
gf_cmx_apply(GF_ColorMatrix * _this,GF_Color col)2458 GF_Color gf_cmx_apply(GF_ColorMatrix *_this, GF_Color col)
2459 {
2460 	Fixed _a, _r, _g, _b, a, r, g, b;
2461 	if (!_this || _this->identity) return col;
2462 
2463 	a = INT2FIX(col>>24);
2464 	a /= 255;
2465 	r = INT2FIX((col>>16)&0xFF);
2466 	r /= 255;
2467 	g = INT2FIX((col>>8)&0xFF);
2468 	g /= 255;
2469 	b = INT2FIX((col)&0xFF);
2470 	b /= 255;
2471 	_r = gf_mulfix(r, _this->m[0]) + gf_mulfix(g, _this->m[1]) + gf_mulfix(b, _this->m[2]) + gf_mulfix(a, _this->m[3]) + _this->m[4];
2472 	_g = gf_mulfix(r, _this->m[5]) + gf_mulfix(g, _this->m[6]) + gf_mulfix(b, _this->m[7]) + gf_mulfix(a, _this->m[8]) + _this->m[9];
2473 	_b = gf_mulfix(r, _this->m[10]) + gf_mulfix(g, _this->m[11]) + gf_mulfix(b, _this->m[12]) + gf_mulfix(a, _this->m[13]) + _this->m[14];
2474 	_a = gf_mulfix(r, _this->m[15]) + gf_mulfix(g, _this->m[16]) + gf_mulfix(b, _this->m[17]) + gf_mulfix(a, _this->m[18]) + _this->m[19];
2475 	CLIP_COMP(_a);
2476 	CLIP_COMP(_r);
2477 	CLIP_COMP(_g);
2478 	CLIP_COMP(_b);
2479 	return GF_COL_ARGB(FIX2INT(_a*255),FIX2INT(_r*255),FIX2INT(_g*255),FIX2INT(_b*255));
2480 }
2481 
2482 GF_EXPORT
gf_cmx_apply_wide(GF_ColorMatrix * _this,u64 col)2483 u64 gf_cmx_apply_wide(GF_ColorMatrix *_this, u64 col)
2484 {
2485 	u64 res;
2486 	Fixed _a, _r, _g, _b, a, r, g, b;
2487 	if (!_this || _this->identity) return col;
2488 
2489 	a = INT2FIX(col>>48);
2490 	a /= 0xFFFF;
2491 	r = INT2FIX((col>>32)&0xFFFF);
2492 	r /= 0xFFFF;
2493 	g = INT2FIX((col>>16)&0xFFFF);
2494 	g /= 0xFFFF;
2495 	b = INT2FIX((col)&0xFFFF);
2496 	b /= 0xFFFF;
2497 	_r = gf_mulfix(r, _this->m[0]) + gf_mulfix(g, _this->m[1]) + gf_mulfix(b, _this->m[2]) + gf_mulfix(a, _this->m[3]) + _this->m[4];
2498 	_g = gf_mulfix(r, _this->m[5]) + gf_mulfix(g, _this->m[6]) + gf_mulfix(b, _this->m[7]) + gf_mulfix(a, _this->m[8]) + _this->m[9];
2499 	_b = gf_mulfix(r, _this->m[10]) + gf_mulfix(g, _this->m[11]) + gf_mulfix(b, _this->m[12]) + gf_mulfix(a, _this->m[13]) + _this->m[14];
2500 	_a = gf_mulfix(r, _this->m[15]) + gf_mulfix(g, _this->m[16]) + gf_mulfix(b, _this->m[17]) + gf_mulfix(a, _this->m[18]) + _this->m[19];
2501 	CLIP_COMP(_a);
2502 	CLIP_COMP(_r);
2503 	CLIP_COMP(_g);
2504 	CLIP_COMP(_b);
2505 	res = (u32) (_a*0xFFFF)&0xFFFF;
2506 	res<<=16;
2507 	res |= (u32) (_r*0xFFFF)&0xFFFF;
2508 	res<<=16;
2509 	res |= (u32) (_g*0xFFFF)&0xFFFF;
2510 	res<<=16;
2511 	res |= (u32) (_b*0xFFFF)&0xFFFF;
2512 	return res;
2513 }
2514 
2515 GF_EXPORT
gf_cmx_apply_fixed(GF_ColorMatrix * _this,Fixed * a,Fixed * r,Fixed * g,Fixed * b)2516 void gf_cmx_apply_fixed(GF_ColorMatrix *_this, Fixed *a, Fixed *r, Fixed *g, Fixed *b)
2517 {
2518 	u32 col = GF_COL_ARGB_FIXED(*a, *r, *g, *b);
2519 	col = gf_cmx_apply(_this, col);
2520 	*a = INT2FIX(GF_COL_A(col)) / 255;
2521 	*r = INT2FIX(GF_COL_R(col)) / 255;
2522 	*g = INT2FIX(GF_COL_G(col)) / 255;
2523 	*b = INT2FIX(GF_COL_B(col)) / 255;
2524 }
2525 
2526 
2527 
2528 //intrinsic code segfaults on 32 bit, need to check why
2529 #if defined(GPAC_64_BITS)
2530 # if defined(WIN32) && !defined(__GNUC__)
2531 #  include <intrin.h>
2532 #  define GPAC_HAS_SSE2
2533 # else
2534 #  ifdef __SSE2__
2535 #   include <emmintrin.h>
2536 #   define GPAC_HAS_SSE2
2537 #  endif
2538 # endif
2539 #endif
2540 
2541 #ifdef GPAC_HAS_SSE2
2542 
color_write_yv12_10_to_yuv_intrin(GF_VideoSurface * vs_dst,unsigned char * pY,unsigned char * pU,unsigned char * pV,u32 src_stride,u32 src_width,u32 src_height,const GF_Window * _src_wnd,Bool swap_uv)2543 static GF_Err color_write_yv12_10_to_yuv_intrin(GF_VideoSurface *vs_dst, unsigned char *pY, unsigned char *pU, unsigned char*pV, u32 src_stride, u32 src_width, u32 src_height, const GF_Window *_src_wnd, Bool swap_uv)
2544 {
2545 	u32 i, j, w, h;
2546 	__m128i val1, val2, val_dst, *src1, *src2, *dst;
2547 	if (!pY) return GF_BAD_PARAM;
2548 
2549 	if (!pU) {
2550 		pU = pY + src_stride * src_height;
2551 		pV = pY + 5*src_stride * src_height/4;
2552 	}
2553 
2554 	if (_src_wnd) {
2555 		pY = pY + src_stride * _src_wnd->y + _src_wnd->x;
2556 		/*because of U and V downsampling by 2x2, working with odd Y offset will lead to a half-line shift between Y and UV components. We
2557 		therefore force an even Y offset for U and V planes.*/
2558 		pU = pU + (src_stride * (_src_wnd->y / 2) + _src_wnd->x) / 2;
2559 		pV = pV + (src_stride * (_src_wnd->y / 2) + _src_wnd->x) / 2;
2560 		w = _src_wnd->w;
2561 		h = _src_wnd->h;
2562 	} else {
2563 		w = src_width;
2564 		h = src_height;
2565 	}
2566 
2567 	if (swap_uv) {
2568 		u8 *t = pV;
2569 		pV = pU;
2570 		pU = t;
2571 	}
2572 
2573 
2574 
2575 		for (i=0; i<h; i++) {
2576 			src1 = (__m128i *)(pY + i*src_stride);
2577 			src2 = src1+1;
2578 			dst = (__m128i *)(vs_dst->video_buffer + i*vs_dst->pitch_y);
2579 
2580 			for (j=0; j<w/16; j++, src1+=2, src2+=2, dst++) {
2581 				val1 = _mm_load_si128(src1);
2582 				val1 = _mm_srli_epi16(val1, 2);
2583 				val2 = _mm_load_si128(src2);
2584 				val2 = _mm_srli_epi16(val2, 2);
2585 				val_dst = _mm_packus_epi16(val1, val2);
2586 				_mm_store_si128(dst, val_dst);
2587 			}
2588 		}
2589 
2590 		for (i=0; i<h/2; i++) {
2591 			src1 = (__m128i *) (pU + i*src_stride/2);
2592 			src2 = src1+1;
2593 			if (vs_dst->u_ptr) dst = (__m128i *) (vs_dst->u_ptr + i*vs_dst->pitch_y/2);
2594 			else dst = (__m128i *)(vs_dst->video_buffer + vs_dst->pitch_y * vs_dst->height + i*vs_dst->pitch_y/2);
2595 
2596 			for (j=0; j<w/32; j++, src1+=2, src2+=2, dst++) {
2597 				val1 = _mm_load_si128(src1);
2598 				val1 = _mm_srli_epi16(val1, 2);
2599 				val2 = _mm_load_si128(src2);
2600 				val2 = _mm_srli_epi16(val2, 2);
2601 				val_dst = _mm_packus_epi16(val1, val2);
2602 				_mm_store_si128(dst, val_dst);
2603 			}
2604 		}
2605 
2606 		for (i=0; i<h/2; i++) {
2607 			src1 = (__m128i *) (pV + i*src_stride/2);
2608 			src2 = src1+1;
2609 			if (vs_dst->v_ptr) dst = (__m128i *) (vs_dst->v_ptr + i*vs_dst->pitch_y/2);
2610 			else dst = (__m128i *)(vs_dst->video_buffer + 5*vs_dst->pitch_y * vs_dst->height/4  + i*vs_dst->pitch_y/2);
2611 
2612 			for (j=0; j<w/32; j++, src1+=2, src2+=2, dst++) {
2613 				val1 = _mm_load_si128(src1);
2614 				val1 = _mm_srli_epi16(val1, 2);
2615 				val2 = _mm_load_si128(src2);
2616 				val2 = _mm_srli_epi16(val2, 2);
2617 				val_dst = _mm_packus_epi16(val1, val2);
2618 				_mm_store_si128(dst, val_dst);
2619 			}
2620 		}
2621 		return GF_OK;
2622 
2623 }
2624 
color_write_yuv422_10_to_yuv422_intrin(GF_VideoSurface * vs_dst,unsigned char * pY,unsigned char * pU,unsigned char * pV,u32 src_stride,u32 src_width,u32 src_height,const GF_Window * _src_wnd,Bool swap_uv)2625 static GF_Err color_write_yuv422_10_to_yuv422_intrin(GF_VideoSurface *vs_dst,  unsigned char *pY, unsigned char *pU, unsigned char*pV, u32 src_stride, u32 src_width, u32 src_height, const GF_Window *_src_wnd, Bool swap_uv)
2626 {
2627 	u32 i, j, w, h;
2628 	__m128i val1, val2, val_dst, *src1, *src2, *dst;
2629 	if (!pU) {
2630 		pU = pY + src_stride * src_height;
2631 		pV = pY + 3*src_stride * src_height/2;
2632 	}
2633 
2634 	if (_src_wnd) {
2635 		pY = pY + src_stride * _src_wnd->y + _src_wnd->x;
2636 		pU = pU + (src_stride * _src_wnd->y  + _src_wnd->x) / 2;
2637 		pV = pV + (src_stride * _src_wnd->y  + _src_wnd->x) / 2;
2638 		w = _src_wnd->w;
2639 		h = _src_wnd->h;
2640 	} else {
2641 		w = src_width;
2642 		h = src_height;
2643 	}
2644 
2645 	if (swap_uv) {
2646 		u8 *t = pV;
2647 		pV = pU;
2648 		pU = t;
2649 	}
2650 
2651 
2652 
2653 	for (i=0; i<h; i++) {
2654 		src1 = (__m128i *)(pY + i*src_stride);
2655 		src2 = src1+1;
2656 		dst = (__m128i *)(vs_dst->video_buffer + i*vs_dst->pitch_y);
2657 
2658 		for (j=0; j<w/16; j++, src1+=2, src2+=2, dst++) {
2659 			val1 = _mm_load_si128(src1);
2660 			val1 = _mm_srli_epi16(val1, 2);
2661 			val2 = _mm_load_si128(src2);
2662 			val2 = _mm_srli_epi16(val2, 2);
2663 			val_dst = _mm_packus_epi16(val1, val2);
2664 			_mm_store_si128(dst, val_dst);
2665 		}
2666 	}
2667 
2668 	for (i=0; i<h; i++) {
2669 		src1 = (__m128i *) (pU + i*src_stride/2);
2670 		src2 = src1+1;
2671 		if (vs_dst->u_ptr) dst = (__m128i *) (vs_dst->u_ptr + i*vs_dst->pitch_y/2);
2672 		else dst = (__m128i *)(vs_dst->video_buffer + vs_dst->pitch_y * vs_dst->height + i*vs_dst->pitch_y/2);
2673 
2674 		for (j=0; j<w/32; j++, src1+=2, src2+=2, dst++) {
2675 			val1 = _mm_load_si128(src1);
2676 			val1 = _mm_srli_epi16(val1, 2);
2677 			val2 = _mm_load_si128(src2);
2678 			val2 = _mm_srli_epi16(val2, 2);
2679 			val_dst = _mm_packus_epi16(val1, val2);
2680 			_mm_store_si128(dst, val_dst);
2681 		}
2682 	}
2683 
2684 	for (i=0; i<h; i++) {
2685 		src1 = (__m128i *) (pV + i*src_stride/2);
2686 		src2 = src1+1;
2687 		if (vs_dst->v_ptr) dst = (__m128i *) (vs_dst->v_ptr + i*vs_dst->pitch_y/2);
2688 		else dst = (__m128i *)(vs_dst->video_buffer + 3*vs_dst->pitch_y * vs_dst->height/2  + i*vs_dst->pitch_y/2);
2689 
2690 		for (j=0; j<w/32; j++, src1+=2, src2+=2, dst++) {
2691 			val1 = _mm_load_si128(src1);
2692 			val1 = _mm_srli_epi16(val1, 2);
2693 			val2 = _mm_load_si128(src2);
2694 			val2 = _mm_srli_epi16(val2, 2);
2695 			val_dst = _mm_packus_epi16(val1, val2);
2696 			_mm_store_si128(dst, val_dst);
2697 		}
2698 	}
2699 	return GF_OK;
2700 
2701 }
2702 
color_write_yuv444_10_to_yuv444_intrin(GF_VideoSurface * vs_dst,unsigned char * pY,unsigned char * pU,unsigned char * pV,u32 src_stride,u32 src_width,u32 src_height,const GF_Window * _src_wnd,Bool swap_uv)2703 static GF_Err color_write_yuv444_10_to_yuv444_intrin(GF_VideoSurface *vs_dst, unsigned char *pY, unsigned char *pU, unsigned char*pV, u32 src_stride, u32 src_width, u32 src_height, const GF_Window *_src_wnd, Bool swap_uv)
2704 {
2705 	u32 i, j, w, h;
2706 	__m128i val1, val2, val_dst, *src1, *src2, *dst;
2707 	if (!pU) {
2708 		pU = pY + src_stride * src_height;
2709 		pV = pY + 2 * src_stride * src_height ;
2710 	}
2711 
2712 	if (_src_wnd) {
2713 		pY = pY + src_stride * _src_wnd->y  + _src_wnd->x;
2714 		pU = pU + src_stride * _src_wnd->y  + _src_wnd->x;
2715 		pV = pV + src_stride * _src_wnd->y  + _src_wnd->x;
2716 		w = _src_wnd->w;
2717 		h = _src_wnd->h;
2718 	}
2719 	else {
2720 		w = src_width;
2721 		h = src_height;
2722 	}
2723 
2724 	if (swap_uv) {
2725 		u8 *t = pV;
2726 		pV = pU;
2727 		pU = t;
2728 	}
2729 
2730 
2731 	for (i = 0; i<h; i++) {
2732 		src1 = (__m128i *)(pY + i*src_stride);
2733 		src2 = src1 + 1;
2734 		dst = (__m128i *)(vs_dst->video_buffer + i*vs_dst->pitch_y);
2735 
2736 		for (j = 0; j<w / 16; j++, src1 += 2, src2 += 2, dst++) {
2737 			val1 = _mm_load_si128(src1);
2738 			val1 = _mm_srli_epi16(val1, 2);
2739 			val2 = _mm_load_si128(src2);
2740 			val2 = _mm_srli_epi16(val2, 2);
2741 			val_dst = _mm_packus_epi16(val1, val2);
2742 			_mm_store_si128(dst, val_dst);
2743 		}
2744 	}
2745 
2746 	for (i = 0; i<h; i++) {
2747 		src1 = (__m128i *) (pU + i*src_stride );
2748 		src2 = src1 + 1;
2749 		if (vs_dst->u_ptr) dst = (__m128i *) (vs_dst->u_ptr + i*vs_dst->pitch_y );
2750 		else dst = (__m128i *)(vs_dst->video_buffer + vs_dst->pitch_y * vs_dst->height + i*vs_dst->pitch_y );
2751 
2752 		for (j = 0; j<w / 16; j++, src1 += 2, src2 += 2, dst++) {
2753 			val1 = _mm_load_si128(src1);
2754 			val1 = _mm_srli_epi16(val1, 2);
2755 			val2 = _mm_load_si128(src2);
2756 			val2 = _mm_srli_epi16(val2, 2);
2757 			val_dst = _mm_packus_epi16(val1, val2);
2758 			_mm_store_si128(dst, val_dst);
2759 		}
2760 	}
2761 
2762 	for (i = 0; i<h ; i++) {
2763 		src1 = (__m128i *) (pV + i*src_stride );
2764 		src2 = src1 + 1;
2765 		if (vs_dst->v_ptr) dst = (__m128i *) (vs_dst->v_ptr + i*vs_dst->pitch_y);
2766 		else dst = (__m128i *)(vs_dst->video_buffer + 2 * vs_dst->pitch_y * vs_dst->height  + i*vs_dst->pitch_y );
2767 
2768 		for (j = 0; j<w / 16; j++, src1 += 2, src2 += 2, dst++) {
2769 			val1 = _mm_load_si128(src1);
2770 			val1 = _mm_srli_epi16(val1, 2);
2771 			val2 = _mm_load_si128(src2);
2772 			val2 = _mm_srli_epi16(val2, 2);
2773 			val_dst = _mm_packus_epi16(val1, val2);
2774 			_mm_store_si128(dst, val_dst);
2775 		}
2776 	}
2777 	return GF_OK;
2778 
2779 }
color_write_yuv422_10_to_yuv_intrin(GF_VideoSurface * vs_dst,unsigned char * pY,unsigned char * pU,unsigned char * pV,u32 src_stride,u32 src_width,u32 src_height,const GF_Window * _src_wnd,Bool swap_uv)2780 static GF_Err color_write_yuv422_10_to_yuv_intrin(GF_VideoSurface *vs_dst, unsigned char *pY, unsigned char *pU, unsigned char*pV, u32 src_stride, u32 src_width, u32 src_height, const GF_Window *_src_wnd, Bool swap_uv)
2781 {
2782 	u32 i, j, w, h;
2783 	__m128i val1, val2, val_dst, *src1, *src2, *dst;
2784 	if (!pU) {
2785 		pU = pY + src_stride * src_height;
2786 		pV = pY + 3 * src_stride * src_height / 2;
2787 	}
2788 
2789 	if (_src_wnd) {
2790 		pY = pY + src_stride * _src_wnd->y + _src_wnd->x;
2791 		pU = pU + (src_stride * _src_wnd->y + _src_wnd->x) / 2;
2792 		pV = pV + (src_stride * _src_wnd->y + _src_wnd->x) / 2;
2793 		w = _src_wnd->w;
2794 		h = _src_wnd->h;
2795 	}
2796 	else {
2797 		w = src_width;
2798 		h = src_height;
2799 	}
2800 
2801 	if (swap_uv) {
2802 		u8 *t = pV;
2803 		pV = pU;
2804 		pU = t;
2805 	}
2806 
2807 
2808 
2809 	for (i = 0; i<h; i++) {
2810 		src1 = (__m128i *)(pY + i*src_stride);
2811 		src2 = src1 + 1;
2812 		dst = (__m128i *)(vs_dst->video_buffer + i*vs_dst->pitch_y);
2813 
2814 		for (j = 0; j<w / 16; j++, src1 += 2, src2 += 2, dst++) {
2815 			val1 = _mm_load_si128(src1);
2816 			val1 = _mm_srli_epi16(val1, 2);
2817 			val2 = _mm_load_si128(src2);
2818 			val2 = _mm_srli_epi16(val2, 2);
2819 			val_dst = _mm_packus_epi16(val1, val2);
2820 			_mm_store_si128(dst, val_dst);
2821 		}
2822 	}
2823 
2824 	for (i = 0; i<h / 2; i++) {
2825 		src1 = (__m128i *) (pU +  i*src_stride);
2826 		src2 = src1 + 1;
2827 		if (vs_dst->u_ptr) dst = (__m128i *) (vs_dst->u_ptr + i*vs_dst->pitch_y / 2);
2828 		else dst = (__m128i *)(vs_dst->video_buffer + vs_dst->pitch_y * vs_dst->height + i*vs_dst->pitch_y / 2);
2829 
2830 		for (j = 0; j<w / 32; j++, src1 += 2, src2 += 2, dst++) {
2831 			val1 = _mm_load_si128(src1);
2832 			val1 = _mm_srli_epi16(val1, 2);
2833 			val2 = _mm_load_si128(src2);
2834 			val2 = _mm_srli_epi16(val2, 2);
2835 			val_dst = _mm_packus_epi16(val1, val2);
2836 			_mm_store_si128(dst, val_dst);
2837 		}
2838 	}
2839 
2840 	for (i = 0; i<h / 2; i++) {
2841 		src1 = (__m128i *) (pV + i*src_stride);
2842 		src2 = src1 + 1;
2843 		if (vs_dst->v_ptr) dst = (__m128i *) (vs_dst->v_ptr + i*vs_dst->pitch_y / 2);
2844 		else dst = (__m128i *)(vs_dst->video_buffer + 5 * vs_dst->pitch_y * vs_dst->height / 4 + i*vs_dst->pitch_y / 2);
2845 
2846 		for (j = 0; j<w / 32; j++, src1 += 2, src2 += 2, dst++) {
2847 			val1 = _mm_load_si128(src1);
2848 			val1 = _mm_srli_epi16(val1, 2);
2849 			val2 = _mm_load_si128(src2);
2850 			val2 = _mm_srli_epi16(val2, 2);
2851 			val_dst = _mm_packus_epi16(val1, val2);
2852 			_mm_store_si128(dst, val_dst);
2853 		}
2854 	}
2855 	return GF_OK;
2856 
2857 }
color_write_yuv444_10_to_yuv_intrin(GF_VideoSurface * vs_dst,unsigned char * pY,unsigned char * pU,unsigned char * pV,u32 src_stride,u32 src_width,u32 src_height,const GF_Window * _src_wnd,Bool swap_uv)2858 static GF_Err color_write_yuv444_10_to_yuv_intrin(GF_VideoSurface *vs_dst, unsigned char *pY, unsigned char *pU, unsigned char*pV, u32 src_stride, u32 src_width, u32 src_height, const GF_Window *_src_wnd, Bool swap_uv)
2859 {
2860 	u32 i, j, w, h;
2861 	__m128i val1, val2,val3,val4, val12, val34, val_dst, *src1, *src2,*src3,*src4, *dst;
2862 
2863 	if (!pU) {
2864 		pU = pY + src_stride * src_height;
2865 		pV = pY + 2 * src_stride * src_height;
2866 	}
2867 
2868 	if (_src_wnd) {
2869 		pY = pY + src_stride * _src_wnd->y + _src_wnd->x;
2870 		pU = pU + src_stride * _src_wnd->y + _src_wnd->x;
2871 		pV = pV + src_stride * _src_wnd->y + _src_wnd->x;
2872 		w = _src_wnd->w;
2873 		h = _src_wnd->h;
2874 	}
2875 	else {
2876 		w = src_width;
2877 		h = src_height;
2878 	}
2879 
2880 	if (swap_uv) {
2881 		u8 *t = pV;
2882 		pV = pU;
2883 		pU = t;
2884 	}
2885 
2886 
2887 	for (i = 0; i<h; i++) {
2888 		src1 = (__m128i *)(pY + i*src_stride);
2889 		src2 = src1 + 1;
2890 		dst = (__m128i *)(vs_dst->video_buffer + i*vs_dst->pitch_y);
2891 
2892 		for (j = 0; j<w / 16; j++, src1 += 2, src2 += 2, dst++) {
2893 			val1 = _mm_load_si128(src1);
2894 			val1 = _mm_srli_epi16(val1, 2);
2895 			val2 = _mm_load_si128(src2);
2896 			val2 = _mm_srli_epi16(val2, 2);
2897 			val_dst = _mm_packus_epi16(val1, val2);
2898 			_mm_store_si128(dst, val_dst);
2899 		}
2900 	}
2901 
2902 	for (i = 0; i<h / 2; i++) {
2903 		src1 = (__m128i *) (pU + 2*i*src_stride);
2904 		src2 = src1 + 1;
2905 		src3 = src2 + 1;
2906 		src4 = src3 + 1;
2907 		if (vs_dst->u_ptr) dst = (__m128i *) (vs_dst->u_ptr + i*vs_dst->pitch_y / 2);
2908 		else dst = (__m128i *)(vs_dst->video_buffer + vs_dst->pitch_y * vs_dst->height + i*vs_dst->pitch_y / 2);
2909 
2910 		for (j = 0; j<w /32; j++, src1 += 4, src2 += 4,src3 +=4, src4+=4, dst++) {
2911 			val1 = _mm_load_si128(src1);
2912 			val1 = _mm_srli_epi32(val1, 16);
2913 			val2 = _mm_load_si128(src2);
2914 			val2 = _mm_srli_epi32(val2, 16);
2915 			val12 = _mm_packs_epi32(val1, val2);
2916 			val12 = _mm_srli_epi16(val12, 2);
2917 
2918 			val3 = _mm_load_si128(src3);
2919 
2920 			val3 = _mm_srli_epi32(val3, 16);
2921 			val4 = _mm_load_si128(src4);
2922 
2923 			val4 = _mm_srli_epi32(val4, 16);
2924 			val34 = _mm_packs_epi32(val3, val4);
2925 			val34 = _mm_srli_epi16(val34, 2);
2926 
2927 			val_dst = _mm_packus_epi16(val12, val34);
2928 			_mm_store_si128(dst, val_dst);
2929 
2930 		}
2931 	}
2932 
2933 	for (i = 0; i<h / 2; i++) {
2934 		src1 = (__m128i *) (pV + 2*i*src_stride );
2935 		src2 = src1 + 1;
2936 		src3 = src1 + 2;
2937 		src4 = src1 + 3;
2938 		if (vs_dst->v_ptr) dst = (__m128i *) (vs_dst->v_ptr + i*vs_dst->pitch_y / 2);
2939 		else dst = (__m128i *)(vs_dst->video_buffer + 5 * vs_dst->pitch_y * vs_dst->height / 4 + i*vs_dst->pitch_y / 2);
2940 
2941 		for (j = 0; j<w / 32; j++, src1 += 4, src2 += 4, src3 += 4, src4 += 4, dst++) {
2942 			val1 = _mm_load_si128(src1);
2943 
2944 			val1 = _mm_srli_epi32(val1, 16);
2945 			val2 = _mm_load_si128(src2);
2946 
2947 			val2 = _mm_srli_epi32(val2, 16);
2948 			val12 = _mm_packs_epi32(val1, val2);
2949 			val12 = _mm_srli_epi16(val12, 2);
2950 
2951 			val3 = _mm_load_si128(src3);
2952 
2953 			val3 = _mm_srli_epi32(val3, 16);
2954 			val4 = _mm_load_si128(src4);
2955 
2956 			val4 = _mm_srli_epi32(val4, 16);
2957 			val34 = _mm_packs_epi32(val3, val4);
2958 			val34 = _mm_srli_epi16(val34, 2);
2959 
2960 			val_dst = _mm_packus_epi16(val12, val34);
2961 			_mm_store_si128(dst, val_dst);
2962 		}
2963 	}
2964 
2965 	return GF_OK;
2966 
2967 }
2968 #endif
2969 
2970 
color_write_yv12_10_to_yuv(GF_VideoSurface * vs_dst,GF_VideoSurface * vs_src,const GF_Window * _src_wnd,Bool swap_uv)2971 static GF_Err color_write_yv12_10_to_yuv(GF_VideoSurface *vs_dst, GF_VideoSurface *vs_src, const GF_Window *_src_wnd, Bool swap_uv)
2972 {
2973 	u32 i, j, w, h;
2974 	u8 *pY = vs_src->video_buffer;
2975 	u8 *pU = vs_src->u_ptr;
2976 	u8 *pV = vs_src->v_ptr;
2977 
2978 	if (_src_wnd) {
2979 		w = _src_wnd->w;
2980 		h = _src_wnd->h;
2981 	} else {
2982 		w = vs_src->width;
2983 		h = vs_src->height;
2984 	}
2985 
2986 	if (!pU) {
2987 		pU = pY + vs_src->pitch_y * vs_src->height;
2988 		pV = pY + 5 * vs_src->pitch_y * vs_src->height/4;
2989 	}
2990 
2991 #ifdef GPAC_HAS_SSE2
2992 
2993 #ifdef GPAC_64_BITS
2994 #define GFINTCAST  (u64)
2995 #else
2996 #define GFINTCAST  (u32)
2997 #endif
2998 
2999 	if ( (w%32 == 0)
3000 	        && (GFINTCAST (vs_dst->video_buffer + vs_dst->pitch_y)%8 == 0)
3001 	        && (GFINTCAST (vs_dst->video_buffer + vs_dst->pitch_y * vs_dst->height + vs_dst->pitch_y/2)%8 == 0)
3002 	        && (GFINTCAST (pU + vs_src->pitch_y/2)%8 == 0)
3003 	        && (GFINTCAST (pV + vs_src->pitch_y/2)%8 == 0)
3004 	   ) {
3005 		return color_write_yv12_10_to_yuv_intrin(vs_dst, pY, pU, pV, vs_src->pitch_y, vs_src->width, vs_src->height, _src_wnd, swap_uv);
3006 	}
3007 #endif
3008 
3009 	if (_src_wnd) {
3010 		pY = pY + vs_src->pitch_y * _src_wnd->y + _src_wnd->x;
3011 		/*because of U and V downsampling by 2x2, working with odd Y offset will lead to a half-line shift between Y and UV components. We
3012 		therefore force an even Y offset for U and V planes.*/
3013 		pU = pU + (vs_src->pitch_y * (_src_wnd->y / 2) + _src_wnd->x) / 2;
3014 		pV = pV + (vs_src->pitch_y * (_src_wnd->y / 2) + _src_wnd->x) / 2;
3015 	}
3016 
3017 	if (swap_uv) {
3018 		u8 *t = pV;
3019 		pV = pU;
3020 		pU = t;
3021 	}
3022 
3023 
3024 	for (i=0; i<h; i++) {
3025 		u16 *src = (u16 *) (pY + i*vs_src->pitch_y);
3026 		u8 *dst = (u8 *) vs_dst->video_buffer + i*vs_dst->pitch_y;
3027 
3028 		for (j=0; j<w; j++) {
3029 			*dst = (*src) >> 2;
3030 			dst++;
3031 			src++;
3032 		}
3033 	}
3034 
3035 	for (i=0; i<h/2; i++) {
3036 		u16 *src = (u16 *) (pU + i*vs_src->pitch_y/2);
3037 		u8 *dst = (u8 *) vs_dst->video_buffer + vs_dst->pitch_y * vs_dst->height + i*vs_dst->pitch_y/2;
3038 		if (vs_dst->u_ptr) dst = (u8 *) (vs_dst->u_ptr + i*vs_dst->pitch_y/2);
3039 
3040 		for (j=0; j<w/2; j++) {
3041 			*dst = (*src) >> 2;
3042 			dst++;
3043 			src++;
3044 		}
3045 	}
3046 
3047 	for (i=0; i<h/2; i++) {
3048 		u16 *src = (u16 *) (pV + i*vs_src->pitch_y/2);
3049 		u8 *dst = (u8 *) vs_dst->video_buffer + 5*vs_dst->pitch_y * vs_dst->height/4  + i*vs_dst->pitch_y/2;
3050 		if (vs_dst->v_ptr) dst = (u8 *) (vs_dst->v_ptr + i*vs_dst->pitch_y/2);
3051 
3052 		for (j=0; j<w/2; j++) {
3053 			*dst = (*src) >> 2;
3054 			dst++;
3055 			src++;
3056 		}
3057 	}
3058 	return GF_OK;
3059 }
3060 
color_write_nv12_10_to_yuv(GF_VideoSurface * vs_dst,GF_VideoSurface * vs_src,GF_Window * _src_wnd,Bool swap_uv)3061 static GF_Err color_write_nv12_10_to_yuv(GF_VideoSurface *vs_dst, GF_VideoSurface *vs_src, GF_Window *_src_wnd, Bool swap_uv)
3062 {
3063 	u32 i, j, w, h;
3064 	u8 *pY = vs_src->video_buffer;
3065 	u8 *pUV = vs_src->u_ptr;
3066 
3067 	if (_src_wnd) {
3068 		w = _src_wnd->w;
3069 		h = _src_wnd->h;
3070 	} else {
3071 		w = vs_src->width;
3072 		h = vs_src->height;
3073 	}
3074 
3075 
3076 //#ifdef GPAC_HAS_SSE2
3077 #if 0
3078 
3079 #ifdef GPAC_64_BITS
3080 #define GFINTCAST  (u64)
3081 #else
3082 #define GFINTCAST  (u32)
3083 #endif
3084 
3085 	if ( (w%32 == 0)
3086 	        && (GFINTCAST (vs_dst->video_buffer + vs_dst->pitch_y)%8 == 0)
3087 	        && (GFINTCAST (vs_dst->video_buffer + vs_dst->pitch_y * vs_dst->height + vs_dst->pitch_y/2)%8 == 0)
3088 	        && (GFINTCAST (pU + src_stride/2)%8 == 0)
3089 	        && (GFINTCAST (pV + src_stride/2)%8 == 0)
3090 	   ) {
3091 		return gf_color_write_yv12_10_to_yuv_intrin(vs_dst, pY, pU, pV, src_stride, src_width, src_height, _src_wnd, swap_uv);
3092 	}
3093 #endif
3094 
3095 	if (!pUV) {
3096 		pUV = pY + vs_src->pitch_y * vs_src->height;
3097 	}
3098 
3099 	if (_src_wnd) {
3100 		pY = pY + vs_src->pitch_y * _src_wnd->y + _src_wnd->x;
3101 		/*because of U and V downsampling by 2x2, working with odd Y offset will lead to a half-line shift between Y and UV components. We
3102 		therefore force an even Y offset for U and V planes.*/
3103 		pUV = pUV + (vs_src->pitch_y * (_src_wnd->y / 2) + _src_wnd->x) / 2;
3104 	}
3105 
3106 	for (i=0; i<h; i++) {
3107 		u16 *src = (u16 *) (pY + i*vs_src->pitch_y);
3108 		u8 *dst = (u8 *) vs_dst->video_buffer + i*vs_dst->pitch_y;
3109 
3110 		for (j=0; j<w; j++) {
3111 			*dst = (*src) >> 2;
3112 			dst++;
3113 			src++;
3114 		}
3115 	}
3116 
3117 	for (i=0; i<h/2; i++) {
3118 		u16 *src = (u16 *) (pUV + i*vs_src->pitch_y/2);
3119 		u8 *dst = (u8 *) vs_dst->video_buffer + vs_dst->pitch_y * vs_dst->height + i*vs_dst->pitch_y/2;
3120 		if (vs_dst->u_ptr) dst = (u8 *) (vs_dst->u_ptr + i*vs_dst->pitch_y/2);
3121 		if (swap_uv) src += 1;
3122 
3123 		for (j=0; j<w/2; j++) {
3124 			*dst = (*src) >> 2;
3125 			dst++;
3126 			src++;
3127 		}
3128 	}
3129 
3130 	for (i=0; i<h/2; i++) {
3131 		u16 *src = (u16 *) (pUV + i*vs_src->pitch_y/2);
3132 		u8 *dst = (u8 *) vs_dst->video_buffer + 5*vs_dst->pitch_y * vs_dst->height/4  + i*vs_dst->pitch_y/2;
3133 		if (vs_dst->v_ptr) dst = (u8 *) (vs_dst->v_ptr + i*vs_dst->pitch_y/2);
3134 		if (!swap_uv) src += 1;
3135 
3136 		for (j=0; j<w/2; j++) {
3137 			*dst = (*src) >> 2;
3138 			dst++;
3139 			src++;
3140 		}
3141 	}
3142 	return GF_OK;
3143 
3144 }
3145 
color_write_yuv422_10_to_yuv422(GF_VideoSurface * vs_dst,GF_VideoSurface * vs_src,GF_Window * _src_wnd,Bool swap_uv)3146 static GF_Err color_write_yuv422_10_to_yuv422(GF_VideoSurface *vs_dst, GF_VideoSurface *vs_src, GF_Window *_src_wnd, Bool swap_uv)
3147 {
3148 	u32 i, j, w, h;
3149 	u8 *pY = vs_src->video_buffer;
3150 	u8 *pU = vs_src->u_ptr;
3151 	u8 *pV = vs_src->v_ptr;
3152 
3153 	if (_src_wnd) {
3154 		w = _src_wnd->w;
3155 		h = _src_wnd->h;
3156 	} else {
3157 		w = vs_src->width;
3158 		h = vs_src->height;
3159 	}
3160 
3161 
3162 #ifdef GPAC_HAS_SSE2
3163 
3164 #ifdef GPAC_64_BITS
3165 #define GFINTCAST  (u64)
3166 #else
3167 #define GFINTCAST  (u32)
3168 #endif
3169 
3170 	if ( (w%32 == 0)
3171 	        && (GFINTCAST (vs_dst->video_buffer + vs_dst->pitch_y)%8 == 0)
3172 	        && (GFINTCAST (vs_dst->video_buffer + vs_dst->pitch_y * vs_dst->height + vs_dst->pitch_y/2)%8 == 0)
3173 	        && (GFINTCAST (pU + vs_src->pitch_y/2)%8 == 0)
3174 	        && (GFINTCAST (pV + vs_src->pitch_y/2)%8 == 0)
3175 	   ) {
3176 		return color_write_yuv422_10_to_yuv422_intrin(vs_dst, pY, pU, pV, vs_src->pitch_y, vs_src->width, vs_src->height, _src_wnd, swap_uv);
3177 	}
3178 #endif
3179 
3180 	if (!pU) {
3181 		pU = pY + vs_src->pitch_y * vs_src->height;
3182 		pV = pY + 3*vs_src->pitch_y * vs_src->height/2;
3183 	}
3184 
3185 	if (_src_wnd) {
3186 		pY = pY + vs_src->pitch_y * _src_wnd->y + _src_wnd->x;
3187 		pU = pU + (vs_src->pitch_y * _src_wnd->y + _src_wnd->x) / 2;
3188 		pV = pV + (vs_src->pitch_y * _src_wnd->y + _src_wnd->x) / 2;
3189 	}
3190 
3191 	if (swap_uv) {
3192 		u8 *t = pV;
3193 		pV = pU;
3194 		pU = t;
3195 	}
3196 
3197 	for (i=0; i<h; i++) {
3198 		u16 *src_y = (u16 *) (pY + i*vs_src->pitch_y);
3199 		u8 *dst_y = (u8 *) vs_dst->video_buffer + i*vs_dst->pitch_y;
3200 
3201 		for (j=0; j<w; j++) {
3202 			*dst_y = (*src_y) >> 2;
3203 			dst_y++;
3204 			src_y++;
3205 		}
3206 	}
3207 	for (i=0; i<h; i++) {
3208 		u16 *src_u = (u16 *) (pU + i*vs_src->pitch_y/2);
3209 		u16 *src_v = (u16 *) (pV + i*vs_src->pitch_y/2);
3210 		u8 *dst_u = (u8 *) vs_dst->video_buffer + vs_dst->width * vs_dst->height + i*vs_dst->pitch_y/2;
3211 		u8 *dst_v = (u8 *) vs_dst->video_buffer + 3*vs_dst->pitch_y * vs_dst->height/2  + i*vs_dst->pitch_y/2;
3212 		if (vs_dst->u_ptr) dst_u = (u8 *) (vs_dst->u_ptr + i*vs_dst->pitch_y/2);
3213 		if (vs_dst->v_ptr) dst_v = (u8 *) (vs_dst->v_ptr + i*vs_dst->pitch_y/2);
3214 
3215 		for (j=0; j<w/2; j++) {
3216 			*dst_u = (*src_u) >> 2;
3217 			dst_u++;
3218 			src_u++;
3219 
3220 			*dst_v = (*src_v) >> 2;
3221 			dst_v++;
3222 			src_v++;
3223 		}
3224 	}
3225 	return GF_OK;
3226 }
3227 
color_write_yuv444_10_to_yuv444(GF_VideoSurface * vs_dst,GF_VideoSurface * vs_src,GF_Window * _src_wnd,Bool swap_uv)3228 static GF_Err color_write_yuv444_10_to_yuv444(GF_VideoSurface *vs_dst, GF_VideoSurface *vs_src, GF_Window *_src_wnd, Bool swap_uv)
3229 {
3230 	u32 i, j, w, h;
3231 	u8 *pY = vs_src->video_buffer;
3232 	u8 *pU = vs_src->u_ptr;
3233 	u8 *pV = vs_src->v_ptr;
3234 
3235 	if (_src_wnd) {
3236 		w = _src_wnd->w;
3237 		h = _src_wnd->h;
3238 	} else {
3239 		w = vs_src->width;
3240 		h = vs_src->height;
3241 	}
3242 
3243 
3244 #ifdef GPAC_HAS_SSE2
3245 
3246 #ifdef GPAC_64_BITS
3247 #define GFINTCAST  (u64)
3248 #else
3249 #define GFINTCAST  (u32)
3250 #endif
3251 
3252 	if ( (w%32 == 0)
3253 	        && (GFINTCAST (vs_dst->video_buffer + vs_dst->pitch_y)%8 == 0)
3254 	        && (GFINTCAST (vs_dst->video_buffer + vs_dst->pitch_y * vs_dst->height + vs_dst->pitch_y)%8 == 0)
3255 	        && (GFINTCAST (pU + vs_src->pitch_y)%8 == 0)
3256 	        && (GFINTCAST (pV + vs_src->pitch_y)%8 == 0)
3257 	   ) {
3258 		return color_write_yuv444_10_to_yuv444_intrin(vs_dst, pY, pU, pV, vs_src->pitch_y, vs_src->width, vs_src->height, _src_wnd, swap_uv);
3259 	}
3260 #endif
3261 
3262 	if (!pU) {
3263 		pU = pY + vs_src->pitch_y * vs_src->height;
3264 		pV = pY + 2*vs_src->pitch_y * vs_src->height;
3265 	}
3266 
3267 	if (_src_wnd) {
3268 		pY = pY + vs_src->pitch_y * _src_wnd->y  + _src_wnd->x;
3269 		pU = pU + vs_src->pitch_y * _src_wnd->y  + _src_wnd->x ;
3270 		pV = pV + vs_src->pitch_y * _src_wnd->y  + _src_wnd->x;
3271 	}
3272 
3273 	if (swap_uv) {
3274 		u8 *t = pV;
3275 		pV = pU;
3276 		pU = t;
3277 	}
3278 
3279 	for (i=0; i<h; i++) {
3280 		u16 *src_y = (u16 *) (pY + i*vs_src->pitch_y);
3281 		u8 *dst_y = (u8 *) vs_dst->video_buffer + i*vs_dst->pitch_y;
3282 
3283 		u16 *src_u= (u16 *) (pU + i*vs_src->pitch_y);
3284 		u8 *dst_u = (u8 *) vs_dst->video_buffer + vs_dst->pitch_y * vs_dst->height+ i*vs_dst->pitch_y;
3285 
3286 		u16 *src_v = (u16 *) (pV + i*vs_src->pitch_y);
3287 		u8 *dst_v = (u8 *) vs_dst->video_buffer + 2*vs_dst->pitch_y * vs_dst->height + i*vs_dst->pitch_y;
3288 
3289 		if (vs_dst->u_ptr) dst_u = (u8 *)(vs_dst->u_ptr + i*vs_dst->pitch_y);
3290 		if (vs_dst->v_ptr) dst_v = (u8 *)(vs_dst->v_ptr + i*vs_dst->pitch_y);
3291 
3292 		for (j=0; j<w; j++) {
3293 			*dst_y = (*src_y) >> 2;
3294 			dst_y++;
3295 			src_y++;
3296 
3297 			*dst_u = (*src_u) >> 2;
3298 			dst_u++;
3299 			src_u++;
3300 
3301 		   *dst_v= (*src_v) >> 2;
3302 			dst_v++;
3303 			src_v++;
3304 		}
3305 	}
3306 	return GF_OK;
3307 }
3308 
color_write_yuv422_10_to_yuv(GF_VideoSurface * vs_dst,GF_VideoSurface * vs_src,GF_Window * _src_wnd,Bool swap_uv)3309 static GF_Err color_write_yuv422_10_to_yuv(GF_VideoSurface *vs_dst, GF_VideoSurface *vs_src, GF_Window *_src_wnd, Bool swap_uv)
3310 {
3311 	u32 i, j, w, h;
3312 	u8 *pY = vs_src->video_buffer;
3313 	u8 *pU = vs_src->u_ptr;
3314 	u8 *pV = vs_src->v_ptr;
3315 
3316 	if (_src_wnd) {
3317 		w = _src_wnd->w;
3318 		h = _src_wnd->h;
3319 	} else {
3320 		w = vs_src->width;
3321 		h = vs_src->height;
3322 	}
3323 
3324 
3325 #ifdef GPAC_HAS_SSE2
3326 
3327 #ifdef GPAC_64_BITS
3328 #define GFINTCAST  (u64)
3329 #else
3330 #define GFINTCAST  (u32)
3331 #endif
3332 
3333 	if ((w % 32 == 0)
3334 		&& (GFINTCAST(vs_dst->video_buffer + vs_dst->pitch_y) % 8 == 0)
3335 		&& (GFINTCAST(vs_dst->video_buffer + vs_dst->pitch_y * vs_dst->height + vs_dst->pitch_y / 2) % 8 == 0)
3336 		&& (GFINTCAST(pU + vs_src->pitch_y / 2) % 8 == 0)
3337 		&& (GFINTCAST(pV + vs_src->pitch_y / 2) % 8 == 0)
3338 		) {
3339 		return color_write_yuv422_10_to_yuv_intrin(vs_dst, pY, pU, pV, vs_src->pitch_y, vs_src->width, vs_src->height, _src_wnd, swap_uv);
3340 	}
3341 #endif
3342 
3343 	if (!pU) {
3344 		pU = pY + vs_src->pitch_y * vs_src->height;
3345 		pV = pY + 3 * vs_src->pitch_y * vs_src->height/2;
3346 	}
3347 
3348 	if (_src_wnd) {
3349 		pY = pY + vs_src->pitch_y * _src_wnd->y + _src_wnd->x;
3350 		pU = pU + (vs_src->pitch_y * _src_wnd->y + _src_wnd->x) / 2;
3351 		pV = pV + (vs_src->pitch_y * _src_wnd->y + _src_wnd->x) / 2;
3352 	}
3353 
3354 	if (swap_uv) {
3355 		u8 *t = pV;
3356 		pV = pU;
3357 		pU = t;
3358 	}
3359 
3360 
3361 	for (i = 0; i<h; i++) {
3362 		u16 *src = (u16 *)(pY + i*vs_src->pitch_y);
3363 		u8 *dst = (u8 *)vs_dst->video_buffer + i*vs_dst->pitch_y;
3364 
3365 		for (j = 0; j<w; j++) {
3366 			*dst = (*src) >> 2;
3367 			dst++;
3368 			src++;
3369 		}
3370 	}
3371 
3372 	for (i = 0; i<h/2; i++) {
3373 		u16 *srcu = (u16 *)(pU +  i*vs_src->pitch_y);
3374 		u16 *srcv = (u16 *)(pV +  i*vs_src->pitch_y);
3375 		u8 *dstu, *dstv;
3376 
3377 		if (vs_dst->u_ptr)
3378 			dstu = (u8 *)(vs_dst->u_ptr + i*vs_dst->pitch_y / 2);
3379 		else
3380 			dstu = (u8 *)vs_dst->video_buffer + vs_dst->pitch_y * vs_dst->height + i*vs_dst->pitch_y / 2;
3381 
3382 		if (vs_dst->v_ptr)
3383 			dstv = (u8 *)(vs_dst->v_ptr + i*vs_dst->pitch_y / 2);
3384 		else
3385 			dstv = (u8 *)vs_dst->video_buffer + 5 * vs_dst->pitch_y * vs_dst->height / 4 + i*vs_dst->pitch_y / 2;
3386 
3387 		for (j = 0; j<w / 2; j++) {
3388 			*dstu = ( (srcu[0] + srcu[1]) / 2) >> 2;
3389 			dstu++;
3390 			srcu+=2;
3391 
3392 			*dstv = ( (srcv[0] + srcv[1]) / 2) >> 2;
3393 			dstv++;
3394 			srcv+=2;
3395 		}
3396 	}
3397 
3398 	return GF_OK;
3399 }
3400 
color_write_yuv444_10_to_yuv(GF_VideoSurface * vs_dst,GF_VideoSurface * vs_src,GF_Window * _src_wnd,Bool swap_uv)3401 static GF_Err color_write_yuv444_10_to_yuv(GF_VideoSurface *vs_dst, GF_VideoSurface *vs_src, GF_Window *_src_wnd, Bool swap_uv)
3402 {
3403 	u32 i, j, w, h;
3404 	u8 *pY = vs_src->video_buffer;
3405 	u8 *pU = vs_src->u_ptr;
3406 	u8 *pV = vs_src->v_ptr;
3407 
3408 	if (_src_wnd) {
3409 		w = _src_wnd->w;
3410 		h = _src_wnd->h;
3411 	} else {
3412 		w = vs_src->width;
3413 		h = vs_src->height;
3414 	}
3415 
3416 
3417 #ifdef GPAC_HAS_SSE2
3418 
3419 #ifdef GPAC_64_BITS
3420 #define GFINTCAST  (u64)
3421 #else
3422 #define GFINTCAST  (u32)
3423 #endif
3424 
3425 	if ( (w % 32 == 0)
3426 		&& (GFINTCAST(vs_dst->video_buffer + vs_dst->pitch_y) % 8 == 0)
3427 		&& (GFINTCAST(vs_dst->video_buffer + vs_dst->pitch_y * vs_dst->height + vs_dst->pitch_y) % 8 == 0)
3428 		&& (GFINTCAST(pU + vs_src->pitch_y) % 8 == 0)
3429 		&& (GFINTCAST(pV + vs_src->pitch_y) % 8 == 0)
3430 		) {
3431 		return color_write_yuv444_10_to_yuv_intrin(vs_dst, pY, pU, pV, vs_src->pitch_y, vs_src->width, vs_src->height, _src_wnd, swap_uv);
3432 	}
3433 #endif
3434 
3435 
3436 	if (!pU) {
3437 		pU = pY + vs_src->pitch_y * vs_src->height;
3438 		pV = pY + 2 * vs_src->pitch_y * vs_src->height;
3439 	}
3440 
3441 	if (_src_wnd) {
3442 		pY = pY + vs_src->pitch_y * _src_wnd->y + _src_wnd->x;
3443 		pU = pU + vs_src->pitch_y * _src_wnd->y + _src_wnd->x;
3444 		pV = pV + vs_src->pitch_y * _src_wnd->y + _src_wnd->x;
3445 	}
3446 
3447 	if (swap_uv) {
3448 		u8 *t = pV;
3449 		pV = pU;
3450 		pU = t;
3451 	}
3452 
3453 	for (i = 0; i<h; i++) {
3454 		u16 *src = (u16 *)(pY + i*vs_src->pitch_y);
3455 		u8 *dst = (u8 *)vs_dst->video_buffer + i*vs_dst->pitch_y;
3456 
3457 		for (j = 0; j<w; j++) {
3458 			*dst = (*src) >> 2;
3459 			dst++;
3460 			src++;
3461 		}
3462 	}
3463 
3464 	for (i = 0; i<h/2; i++) {
3465 		u16 *srcu1 = (u16 *)(pU + 2*i*vs_src->pitch_y );
3466 		u16 *srcu2 = (u16 *)(pU + 2*(i+1)*vs_src->pitch_y );
3467 		u16 *srcv1 = (u16 *)(pV + 2*i*vs_src->pitch_y );
3468 		u16 *srcv2 = (u16 *)(pV + 2*(i+1)*vs_src->pitch_y );
3469 		u8 *dstu, *dstv;
3470 
3471 		if (vs_dst->u_ptr)
3472 			dstu = (u8 *)(vs_dst->u_ptr + i*vs_dst->pitch_y / 2);
3473 		else
3474 			dstu = (u8 *)vs_dst->video_buffer + vs_dst->pitch_y * vs_dst->height + i*vs_dst->pitch_y / 2;
3475 
3476 		if (vs_dst->v_ptr)
3477 			dstv = (u8 *)(vs_dst->v_ptr + i*vs_dst->pitch_y / 2);
3478 		else
3479 			dstv = (u8 *)vs_dst->video_buffer + 5 * vs_dst->pitch_y * vs_dst->height / 4 + i*vs_dst->pitch_y / 2;
3480 
3481 		for (j = 0; j<w/2 ;j++) {
3482 			u32 u, v;
3483 			u = (srcu1[0] + srcu1[1] + srcu2[0] + srcu2[1] ) / 4;
3484 			*dstu = u>>2;
3485 			dstu++;
3486 			srcu1+=2;
3487 			srcu2+=2;
3488 
3489 			v = (srcv1[0] + srcv1[1] + srcv2[0] + srcv2[1] ) / 4;
3490 			*dstv = v>>2;
3491 			dstv++;
3492 			srcv1+=2;
3493 			srcv2+=2;
3494 		}
3495 	}
3496 	return GF_OK;
3497 }
3498 
is_planar_yuv(u32 pf)3499 static Bool is_planar_yuv(u32 pf)
3500 {
3501 	switch (pf) {
3502 	case GF_PIXEL_YUV:
3503 	case GF_PIXEL_YUV_10:
3504 	case GF_PIXEL_YUV422:
3505 	case GF_PIXEL_YUV422_10:
3506 	case GF_PIXEL_YUV444:
3507 	case GF_PIXEL_YUV444_10:
3508 		return GF_TRUE;
3509 	}
3510 	return GF_FALSE;
3511 }
3512 
3513 
color_write_yuv420_to_yuv(GF_VideoSurface * vs_dst,GF_VideoSurface * vs_src,GF_Window * _src_wnd,Bool swap_uv)3514 static GF_Err color_write_yuv420_to_yuv(GF_VideoSurface *vs_dst, GF_VideoSurface *vs_src, GF_Window *_src_wnd, Bool swap_uv)
3515 {
3516 	u32 w, h, ox, oy;
3517 	u8 *pY = vs_src->video_buffer;
3518 	u8 *pU = vs_src->u_ptr;
3519 	u8 *pV = vs_src->v_ptr;
3520 
3521 	if (_src_wnd) {
3522 		w = _src_wnd->w;
3523 		h = _src_wnd->h;
3524 		ox = _src_wnd->x;
3525 		oy = _src_wnd->y;
3526 	} else {
3527 		w = vs_src->width;
3528 		h = vs_src->height;
3529 		ox = oy = 0;
3530 	}
3531 
3532 	if (!pU) {
3533 		pU = pY + vs_src->pitch_y * vs_src->height;
3534 		pV = pY + 5 * vs_src->pitch_y * vs_src->height / 4;
3535 	}
3536 
3537 
3538 	pY = pY + vs_src->pitch_y * oy + ox;
3539 	/*because of U and V downsampling by 2x2, working with odd Y offset will lead to a half-line shift between Y and UV components. We
3540 	therefore force an even Y offset for U and V planes.*/
3541 	pU = pU + (vs_src->pitch_y * (oy / 2) + ox) / 2;
3542 	pV = pV + (vs_src->pitch_y * (oy / 2) + ox) / 2;
3543 
3544 
3545 	if (is_planar_yuv(vs_dst->pixel_format)) {
3546 		/*complete source copy*/
3547 		if ((vs_dst->pitch_y == (s32)vs_src->pitch_y) && (w == vs_src->width) && (h == vs_src->height)) {
3548 			assert(!ox);
3549 			assert(!oy);
3550 			memcpy(vs_dst->video_buffer, pY, sizeof(u8)*w*h);
3551 			if (vs_dst->pixel_format == GF_PIXEL_YUV) {
3552 				memcpy(vs_dst->video_buffer + vs_dst->pitch_y * vs_dst->height, pV, sizeof(u8)*w*h/ 4);
3553 				memcpy(vs_dst->video_buffer + 5 * vs_dst->pitch_y * vs_dst->height / 4, pU, sizeof(u8)*w*h/ 4);
3554 			}
3555 			else {
3556 				memcpy(vs_dst->video_buffer + vs_dst->pitch_y * vs_dst->height, pU, sizeof(u8)*w*h / 4);
3557 				memcpy(vs_dst->video_buffer + 5 * vs_dst->pitch_y * vs_dst->height / 4, pV, sizeof(u8)*w*h/ 4);
3558 			}
3559 		} else {
3560 			u32 i;
3561 			u8 *dst, *src, *dst2, *src2, *dst3, *src3;
3562 
3563 			src = pY;
3564 			dst = (u8*)vs_dst->video_buffer;
3565 
3566 			src2 = (vs_dst->pixel_format != GF_PIXEL_YUV) ? pU : pV;
3567 			dst2 = (u8*)vs_dst->video_buffer + vs_dst->pitch_y * vs_dst->height;
3568 			src3 = (vs_dst->pixel_format != GF_PIXEL_YUV) ? pV : pU;
3569 			dst3 = (u8*)vs_dst->video_buffer + 5 * vs_dst->pitch_y * vs_dst->height / 4;
3570 			for (i = 0; i<h; i++) {
3571 				memcpy(dst, src, w);
3572 				src += vs_src->pitch_y;
3573 				dst += vs_dst->pitch_y;
3574 				if (i<h / 2) {
3575 					memcpy(dst2, src2, w / 2);
3576 					src2 += vs_src->pitch_y/ 2;
3577 					dst2 += vs_dst->pitch_y / 2;
3578 					memcpy(dst3, src3, w / 2);
3579 					src3 += vs_src->pitch_y / 2;
3580 					dst3 += vs_dst->pitch_y / 2;
3581 				}
3582 			}
3583 		}
3584 	}
3585 	else if (vs_dst->pixel_format == GF_PIXEL_UYVY) {
3586 		u32 i, j;
3587 		for (i = 0; i<h; i++) {
3588 			u8 *dst, *y, *u, *v;
3589 			y = pY + i*vs_src->pitch_y;
3590 			u = pU + (i / 2) * vs_src->pitch_y  / 2;
3591 			v = pV + (i / 2) * vs_src->pitch_y / 2;
3592 			dst = (u8 *)vs_dst->video_buffer + i*vs_dst->pitch_y;
3593 
3594 			for (j = 0; j<w / 2; j++) {
3595 				*dst = *u;
3596 				dst++;
3597 				u++;
3598 				*dst = *y;
3599 				dst++;
3600 				y++;
3601 				*dst = *v;
3602 				dst++;
3603 				v++;
3604 				*dst = *y;
3605 				dst++;
3606 				y++;
3607 			}
3608 		}
3609 	}
3610 	else if (vs_dst->pixel_format == GF_PIXEL_VYUY) {
3611 		u32 i, j;
3612 		for (i = 0; i<h; i++) {
3613 			u8 *dst, *y, *u, *v;
3614 			y = pY + i*vs_src->pitch_y;
3615 			u = pU + (i / 2) * vs_src->pitch_y / 2;
3616 			v = pV + (i / 2) * vs_src->pitch_y / 2;
3617 			dst = (u8 *)vs_dst->video_buffer + i*vs_dst->pitch_y;
3618 
3619 			for (j = 0; j<w / 2; j++) {
3620 				*dst = *v;
3621 				dst++;
3622 				v++;
3623 				*dst = *y;
3624 				dst++;
3625 				y++;
3626 				*dst = *u;
3627 				dst++;
3628 				u++;
3629 				*dst = *y;
3630 				dst++;
3631 				y++;
3632 			}
3633 		}
3634 	}
3635 	else if (vs_dst->pixel_format == GF_PIXEL_YUYV) {
3636 		u32 i, j;
3637 		for (i = 0; i<h; i++) {
3638 			u8 *dst, *y, *u, *v;
3639 			y = pY + i*vs_src->pitch_y;
3640 			u = pU + (i / 2) * vs_src->pitch_y / 2;
3641 			v = pV + (i / 2) * vs_src->pitch_y / 2;
3642 			dst = (u8*)vs_dst->video_buffer + i*vs_dst->pitch_y;
3643 
3644 			for (j = 0; j<w / 2; j++) {
3645 				*dst = *y;
3646 				dst++;
3647 				y++;
3648 				*dst = *u;
3649 				dst++;
3650 				u++;
3651 				*dst = *y;
3652 				dst++;
3653 				y++;
3654 				*dst = *v;
3655 				dst++;
3656 				v++;
3657 			}
3658 		}
3659 	}
3660 	else if (vs_dst->pixel_format == GF_PIXEL_YVYU) {
3661 		u32 i, j;
3662 		for (i = 0; i<h; i++) {
3663 			u8 *dst, *y, *u, *v;
3664 			y = pY + i*vs_src->pitch_y;
3665 			u = pU + (i / 2) * vs_src->pitch_y / 2;
3666 			v = pV + (i / 2) * vs_src->pitch_y / 2;
3667 			dst = (u8*)vs_dst->video_buffer + i*vs_dst->pitch_y;
3668 
3669 			for (j = 0; j<w / 2; j++) {
3670 				*dst = *y;
3671 				dst++;
3672 				y++;
3673 				*dst = *v;
3674 				dst++;
3675 				v++;
3676 				*dst = *y;
3677 				dst++;
3678 				y++;
3679 				*dst = *u;
3680 				dst++;
3681 				u++;
3682 			}
3683 		}
3684 	}
3685 	else {
3686 		return GF_NOT_SUPPORTED;
3687 	}
3688 	return GF_OK;
3689 }
3690 
color_write_yuv422_to_yuv(GF_VideoSurface * vs_dst,GF_VideoSurface * vs_src,GF_Window * _src_wnd,Bool swap_uv)3691 static GF_Err color_write_yuv422_to_yuv(GF_VideoSurface *vs_dst, GF_VideoSurface *vs_src, GF_Window *_src_wnd, Bool swap_uv)
3692 {
3693 	u32 w, h, ox, oy;
3694 	u8 *pY = vs_src->video_buffer;
3695 	u8 *pU = vs_src->u_ptr;
3696 	u8 *pV = vs_src->v_ptr;
3697 
3698 	if (_src_wnd) {
3699 		w = _src_wnd->w;
3700 		h = _src_wnd->h;
3701 		ox = _src_wnd->x;
3702 		oy = _src_wnd->y;
3703 	}
3704 	else {
3705 		w = vs_src->width;
3706 		h = vs_src->height;
3707 		ox = oy = 0;
3708 	}
3709 
3710 	if (!pU) {
3711 		pU = pY + vs_src->pitch_y * vs_src->height;
3712 		pV = pY + 3 * vs_src->pitch_y * vs_src->height / 2;
3713 	}
3714 
3715 
3716 	pY = pY + vs_src->pitch_y * oy + ox;
3717 	pU = pU + (vs_src->pitch_y * oy + ox) / 2;
3718 	pV = pV + (vs_src->pitch_y * oy + ox) / 2;
3719 
3720 
3721 	if (is_planar_yuv(vs_dst->pixel_format)) {
3722 		/*complete source copy*/
3723 		u32 i;
3724 		u8 *dst, *src, *dst2, *src2, *dst3, *src3, *_src2, *_src3;
3725 
3726 		src = pY;
3727 		_src2 = (vs_dst->pixel_format != GF_PIXEL_YUV) ? pU : pV;
3728 		_src3 = (vs_dst->pixel_format != GF_PIXEL_YUV) ? pV : pU;
3729 		dst = (u8*)vs_dst->video_buffer;
3730 		dst2 = (u8*)vs_dst->video_buffer + vs_dst->pitch_y * vs_dst->height;
3731 		dst3 = (u8*)vs_dst->video_buffer + 5 * vs_dst->pitch_y * vs_dst->height / 4;
3732 		for (i = 0; i<h; i++) {
3733 			memcpy(dst, src, w);
3734 			src += vs_src->pitch_y;
3735 			dst += vs_dst->pitch_y;
3736 			if (i < h / 2) {
3737 				src2 = _src2 + i*vs_src->pitch_y;
3738 				src3 = _src3 + i*vs_src->pitch_y;
3739 				memcpy(dst2, src2, w / 2);
3740 				memcpy(dst3, src3, w / 2);
3741 				dst2 += vs_dst->pitch_y / 2;
3742 				dst3 += vs_dst->pitch_y / 2;
3743 			}
3744 		}
3745 		return GF_OK;
3746 	}
3747 	return GF_NOT_SUPPORTED;
3748 }
3749 
3750 
color_write_yuv444_to_yuv(GF_VideoSurface * vs_dst,GF_VideoSurface * vs_src,GF_Window * _src_wnd,Bool swap_uv)3751 static GF_Err color_write_yuv444_to_yuv(GF_VideoSurface *vs_dst, GF_VideoSurface *vs_src, GF_Window *_src_wnd, Bool swap_uv)
3752 {
3753 	u32 w, h, ox, oy;
3754 	u8 *pY = vs_src->video_buffer;
3755 	u8 *pU = vs_src->u_ptr;
3756 	u8 *pV = vs_src->v_ptr;
3757 
3758 	if (_src_wnd) {
3759 		w = _src_wnd->w;
3760 		h = _src_wnd->h;
3761 		ox = _src_wnd->x;
3762 		oy = _src_wnd->y;
3763 	}
3764 	else {
3765 		w = vs_src->width;
3766 		h = vs_src->height;
3767 		ox = oy = 0;
3768 	}
3769 
3770 	if (!pU) {
3771 		pU = pY + vs_src->pitch_y* vs_src->height;
3772 		pV = pY + 2 * vs_src->pitch_y * vs_src->height;
3773 	}
3774 
3775 	pY = pY + vs_src->pitch_y * oy + ox;
3776 	pU = pU + vs_src->pitch_y * oy + ox;
3777 	pV = pV + vs_src->pitch_y * oy + ox;
3778 
3779 	if (is_planar_yuv(vs_dst->pixel_format)) {
3780 		/*complete source copy*/
3781 		u32 i, j;
3782 		u8 *dst, *src, *_src2, *_src3;
3783 
3784 		src = pY;
3785 		_src2 = (vs_dst->pixel_format != GF_PIXEL_YUV) ? pU : pV;
3786 		_src3 = (vs_dst->pixel_format != GF_PIXEL_YUV) ? pV : pU;
3787 		dst = (u8*)vs_dst->video_buffer;
3788 
3789 		for (i = 0; i<h; i++) {
3790 			memcpy(dst, src, w);
3791 			src += vs_src->pitch_y;
3792 			dst += vs_dst->pitch_y;
3793 
3794 		}
3795 		for (i = 0; i < h / 2; i++) {
3796 			u8 *dst2, *src2, *dst3, *src3;
3797 			src2 = _src2 + 2 * i*vs_src->pitch_y;
3798 			dst2 = (u8*)vs_dst->video_buffer + vs_dst->pitch_y * vs_dst->height + i* vs_dst->pitch_y / 2;
3799 			src3 = _src3 + 2 * i*vs_src->pitch_y;
3800 			dst3 = (u8*)vs_dst->video_buffer + 5 * vs_dst->pitch_y * vs_dst->height / 4 + i* vs_dst->pitch_y / 2;
3801 			for (j = 0; j<w / 2; j++) {
3802 				*dst2 = *src2;
3803 				dst2++;
3804 				src2 += 2;
3805 
3806 				*dst3 = *src3;
3807 				dst3++;
3808 				src3 += 2;
3809 			}
3810 		}
3811 		return GF_OK;
3812 	}
3813 	return GF_NOT_SUPPORTED;
3814 }
3815 
color_write_yvyu_to_yuv(GF_VideoSurface * vs_dst,GF_VideoSurface * vs_src,GF_Window * _src_wnd,Bool swap_uv)3816 static GF_Err color_write_yvyu_to_yuv(GF_VideoSurface *vs_dst, GF_VideoSurface *vs_src, GF_Window *_src_wnd, Bool swap_uv)
3817 {
3818 	u32 i, j;
3819 	u32 w, h, ox, oy;
3820 	u8 *pY, *pU, *pV;
3821 
3822 	if (_src_wnd) {
3823 		w = _src_wnd->w;
3824 		h = _src_wnd->h;
3825 		ox = _src_wnd->x;
3826 		oy = _src_wnd->y;
3827 	}
3828 	else {
3829 		w = vs_src->width;
3830 		h = vs_src->height;
3831 		ox = oy = 0;
3832 	}
3833 
3834 	switch (vs_src->pixel_format) {
3835 	case GF_PIXEL_UYVY:
3836 		pU = vs_src->video_buffer + vs_src->pitch_y* oy + ox;
3837 		pY = vs_src->video_buffer + vs_src->pitch_y * oy + ox + 1;
3838 		pV = vs_src->video_buffer + vs_src->pitch_y * oy + ox + 3;
3839 		break;
3840 	case GF_PIXEL_YUYV:
3841 		pY = vs_src->video_buffer + vs_src->pitch_y * oy + ox;
3842 		pU = vs_src->video_buffer + vs_src->pitch_y * oy + ox + 1;
3843 		pV = vs_src->video_buffer + vs_src->pitch_y * oy + ox + 3;
3844 		break;
3845 	case GF_PIXEL_YVYU:
3846 		pY = vs_src->video_buffer + vs_src->pitch_y * oy + ox;
3847 		pV = vs_src->video_buffer + vs_src->pitch_y * oy + ox + 1;
3848 		pU = vs_src->video_buffer + vs_src->pitch_y * oy + ox + 3;
3849 		break;
3850 	case GF_PIXEL_VYUY:
3851 		pV = vs_src->video_buffer + vs_src->pitch_y* oy + ox;
3852 		pY = vs_src->video_buffer + vs_src->pitch_y * oy + ox + 1;
3853 		pU = vs_src->video_buffer + vs_src->pitch_y * oy + ox + 3;
3854 		break;
3855 	default:
3856 		return GF_NOT_SUPPORTED;
3857 	}
3858 
3859 	if (is_planar_yuv(vs_dst->pixel_format)) {
3860 		u8 *dst_y, *dst_u, *dst_v;
3861 
3862 		dst_y = (u8*)vs_dst->video_buffer;
3863 		if (vs_dst->pixel_format == GF_PIXEL_YUV) {
3864 			dst_v = (u8*)vs_dst->video_buffer + vs_dst->pitch_y * vs_dst->height;
3865 			dst_u = (u8*)vs_dst->video_buffer + 5 * vs_dst->pitch_y * vs_dst->height / 4;
3866 		}
3867 		else {
3868 			dst_u = (u8*)vs_dst->video_buffer + vs_dst->pitch_y * vs_dst->height;
3869 			dst_v = (u8*)vs_dst->video_buffer + 5 * vs_dst->pitch_y * vs_dst->height / 4;
3870 		}
3871 		for (i = 0; i<h; i++) {
3872 			for (j = 0; j<w; j += 2) {
3873 				*dst_y = *pY;
3874 				*(dst_y + 1) = *(pY + 2);
3875 				dst_y += 2;
3876 				pY += 4;
3877 				if (i % 2) continue;
3878 
3879 				*dst_u = (*pU + *(pU + vs_src->pitch_y)) / 2;
3880 				*dst_v = (*pV + *(pV + vs_src->pitch_y)) / 2;
3881 				dst_u++;
3882 				dst_v++;
3883 				pU += 4;
3884 				pV += 4;
3885 			}
3886 			if (i % 2) {
3887 				pU += vs_src->pitch_y;
3888 				pV += vs_src->pitch_y;
3889 			}
3890 		}
3891 		return GF_OK;
3892 	}
3893 
3894 	if (vs_src->pixel_format == vs_dst->pixel_format) {
3895 		for (i = 0; i<h; i++) {
3896 			char *dst = vs_dst->video_buffer + i*vs_dst->pitch_y;
3897 			pY = vs_src->video_buffer + vs_src->pitch_y * (i + oy) + ox;
3898 			memcpy(dst, pY, sizeof(char) * 2 * w);
3899 		}
3900 		return GF_OK;
3901 	}
3902 
3903 	for (i = 0; i<h; i++) {
3904 		u8 *dst = vs_dst->video_buffer + i*vs_dst->pitch_y;
3905 		u8 *y = pY + vs_src->pitch_y * i;
3906 		u8 *u = pU + vs_src->pitch_y * i;
3907 		u8 *v = pV + vs_src->pitch_y * i;
3908 		switch (vs_dst->pixel_format) {
3909 		case GF_PIXEL_UYVY:
3910 			for (j = 0; j<w; j += 2) {
3911 				dst[0] = *u;
3912 				dst[1] = *y;
3913 				dst[2] = *v;
3914 				dst[3] = *(y + 2);
3915 				dst += 4;
3916 				y += 4;
3917 				u += 4;
3918 				v += 4;
3919 			}
3920 			break;
3921 		case GF_PIXEL_YVYU:
3922 			for (j = 0; j<w; j += 2) {
3923 				dst[0] = *y;
3924 				dst[1] = *v;
3925 				dst[2] = *(y + 2);
3926 				dst[3] = *u;
3927 				dst += 4;
3928 				y += 4;
3929 				u += 4;
3930 				v += 4;
3931 			}
3932 			break;
3933 		case GF_PIXEL_YUYV:
3934 			for (j = 0; j<w; j += 2) {
3935 				dst[0] = *y;
3936 				dst[1] = *u;
3937 				dst[2] = *(y + 2);
3938 				dst[3] = *v;
3939 				dst += 4;
3940 				y += 4;
3941 				u += 4;
3942 				v += 4;
3943 			}
3944 			break;
3945 		case GF_PIXEL_VYUY:
3946 			for (j = 0; j<w; j += 2) {
3947 				dst[0] = *v;
3948 				dst[1] = *y;
3949 				dst[2] = *u;
3950 				dst[3] = *(y + 2);
3951 				dst += 4;
3952 				y += 4;
3953 				u += 4;
3954 				v += 4;
3955 			}
3956 			break;
3957 		default:
3958 			return GF_NOT_SUPPORTED;
3959 		}
3960 	}
3961 	return GF_OK;
3962 }
3963 
3964 
get_bpp(u32 pf)3965 u32 get_bpp(u32 pf)
3966 {
3967 	switch (pf) {
3968 	case GF_PIXEL_RGB_555:
3969 	case GF_PIXEL_RGB_565:
3970 		return 2;
3971 	case GF_PIXEL_RGB:
3972 	case GF_PIXEL_RGBS:
3973 	case GF_PIXEL_BGR:
3974 		return 3;
3975 	case GF_PIXEL_RGBX:
3976 	case GF_PIXEL_BGRX:
3977 	case GF_PIXEL_XRGB:
3978 	case GF_PIXEL_XBGR:
3979 	case GF_PIXEL_ARGB:
3980 	case GF_PIXEL_RGBAS:
3981 	case GF_PIXEL_RGBD:
3982 	case GF_PIXEL_RGBDS:
3983 		return 4;
3984 	}
3985 	return 0;
3986 }
3987 
color_write_rgb_to_24(GF_VideoSurface * vs_dst,GF_VideoSurface * vs_src,GF_Window * _src_wnd)3988 static GF_Err color_write_rgb_to_24(GF_VideoSurface *vs_dst, GF_VideoSurface *vs_src, GF_Window *_src_wnd)
3989 {
3990 	u32 i;
3991 	u32 w, h, ox, oy;
3992 	u8 *src;
3993 	u32 BPP;
3994 
3995 	if (vs_src->pixel_format != vs_dst->pixel_format) return GF_NOT_SUPPORTED;
3996 	BPP = get_bpp(vs_src->pixel_format);
3997 	if (!BPP) return GF_NOT_SUPPORTED;
3998 
3999 	if (_src_wnd) {
4000 		w = _src_wnd->w;
4001 		h = _src_wnd->h;
4002 		ox = _src_wnd->x;
4003 		oy = _src_wnd->y;
4004 	} else {
4005 		w = vs_src->width;
4006 		h = vs_src->height;
4007 		ox = oy = 0;
4008 	}
4009 
4010 	/*go to start of src*/
4011 	src = vs_src->video_buffer + vs_src->pitch_x * oy + BPP * ox;
4012 
4013 	for (i = 0; i<h; i++) {
4014 		memcpy(vs_dst->video_buffer + i*vs_dst->pitch_y, src, sizeof(u8) * BPP * w);
4015 		src += vs_src->pitch_y;
4016 	}
4017 	return GF_OK;
4018 }
4019 
4020 
color_write_rgb_to_32(GF_VideoSurface * vs_dst,GF_VideoSurface * vs_src,GF_Window * _src_wnd)4021 static GF_Err color_write_rgb_to_32(GF_VideoSurface *vs_dst, GF_VideoSurface *vs_src, GF_Window *_src_wnd)
4022 {
4023 	u32 i, j, w, h, ox, oy;
4024 	u8 *src;
4025 	Bool isBGR;
4026 	u8 *dst, *cur;
4027 	u32 BPP = get_bpp(vs_src->pixel_format);
4028 	if (!BPP) return GF_NOT_SUPPORTED;
4029 
4030 	if (_src_wnd) {
4031 		w = _src_wnd->w;
4032 		h = _src_wnd->h;
4033 		ox = _src_wnd->x;
4034 		oy = _src_wnd->y;
4035 	}
4036 	else {
4037 		w = vs_src->width;
4038 		h = vs_src->height;
4039 		ox = oy = 0;
4040 	}
4041 
4042 	/*go to start of src*/
4043 	src = vs_src->video_buffer + vs_src->pitch_y * oy + BPP * ox;
4044 
4045 	if (vs_src->pixel_format == vs_dst->pixel_format) {
4046 		for (i = 0; i<h; i++) {
4047 			memcpy(vs_dst->video_buffer + i*vs_dst->pitch_y, src, sizeof(u8) * BPP * w);
4048 		}
4049 		return GF_OK;
4050 	}
4051 	/*get all pixels*/
4052 	isBGR = (vs_dst->pixel_format == GF_PIXEL_BGRX) ? GF_TRUE : GF_FALSE;
4053 	if (isBGR) {
4054 		switch (vs_src->pixel_format) {
4055 		case GF_PIXEL_RGB:
4056 		case GF_PIXEL_RGBS:
4057 			for (i = 0; i<h; i++) {
4058 				dst = (u8*)vs_dst->video_buffer + i*vs_dst->pitch_y;
4059 				cur = src + i*vs_src->pitch_y;
4060 				for (j = 0; j<w; j++) {
4061 					dst[0] = *cur++;
4062 					dst[1] = *cur++;
4063 					dst[2] = *cur++;
4064 					dst += 4;
4065 				}
4066 			}
4067 			break;
4068 		case GF_PIXEL_RGBDS:
4069 		case GF_PIXEL_RGBD:
4070 			for (i = 0; i<h; i++) {
4071 				dst = (u8*)vs_dst->video_buffer + i*vs_dst->pitch_y;
4072 				cur = src + i*vs_src->pitch_y;
4073 				for (j = 0; j<w; j++) {
4074 					dst[0] = *cur++;
4075 					dst[1] = *cur++;
4076 					dst[2] = *cur++;
4077 					cur++;
4078 					dst += 4;
4079 				}
4080 			}
4081 			break;
4082 		case GF_PIXEL_BGR:
4083 			for (i = 0; i<h; i++) {
4084 				dst = (u8*)vs_dst->video_buffer + i*vs_dst->pitch_y;
4085 				cur = src + i*vs_src->pitch_y;
4086 				for (j = 0; j<w; j++) {
4087 					dst[2] = *cur++;
4088 					dst[1] = *cur++;
4089 					dst[0] = *cur++;
4090 					dst += 4;
4091 				}
4092 			}
4093 			break;
4094 		default:
4095 			return GF_NOT_SUPPORTED;
4096 		}
4097 	}
4098 	else {
4099 		switch (vs_src->pixel_format) {
4100 		case GF_PIXEL_RGB:
4101 		case GF_PIXEL_RGBS:
4102 			for (i = 0; i<h; i++) {
4103 				dst = (u8*)vs_dst->video_buffer + i*vs_dst->pitch_y;
4104 				cur = src + i*vs_src->pitch_y;
4105 				for (j = 0; j<w; j++) {
4106 					dst[2] = *cur++;
4107 					dst[1] = *cur++;
4108 					dst[0] = *cur++;
4109 					dst += 4;
4110 				}
4111 			}
4112 			break;
4113 		case GF_PIXEL_RGBD:
4114 		case GF_PIXEL_RGBDS:
4115 			for (i = 0; i<h; i++) {
4116 				dst = (u8*)vs_dst->video_buffer + i*vs_dst->pitch_y;
4117 				cur = src + i*vs_src->pitch_y;
4118 				for (j = 0; j<w; j++) {
4119 					dst[2] = *cur++;
4120 					dst[1] = *cur++;
4121 					dst[0] = *cur++;
4122 					cur++;
4123 					dst += 4;
4124 				}
4125 			}
4126 			break;
4127 		case GF_PIXEL_BGR:
4128 			for (i = 0; i<h; i++) {
4129 				dst = (u8*)vs_dst->video_buffer + i*vs_dst->pitch_y;
4130 				cur = src + i*vs_src->pitch_y;
4131 				for (j = 0; j<w; j++) {
4132 					dst[0] = *cur++;
4133 					dst[1] = *cur++;
4134 					dst[2] = *cur++;
4135 					dst += 4;
4136 				}
4137 			}
4138 			break;
4139 		default:
4140 			return GF_NOT_SUPPORTED;
4141 		}
4142 	}
4143 	return GF_OK;
4144 }
4145 
4146 #endif
4147 
4148 
4149 /* Basic SVG datatype parsing functions */
4150 static const struct predef_col {
4151 	const char *name;
4152 	u8 r;
4153 	u8 g;
4154 	u8 b;
4155 } predefined_colors[] =
4156 {
4157 	{"aliceblue",240, 248, 255},
4158 	{"antiquewhite",250, 235, 215},
4159 	{"aquamarine",127, 255, 212},
4160 	{"azure",240, 255, 255},
4161 	{"beige",245, 245, 220},
4162 	{"bisque",255, 228, 196},
4163 	{"black", 0, 0, 0},
4164 	{"blanchedalmond",255, 235, 205},
4165 	{"blue", 0, 0, 255},
4166 	{"blueviolet",138, 43, 226},
4167 	{"brown",165, 42, 42},
4168 	{"burlywood",222, 184, 135},
4169 	{"cadetblue", 95, 158, 160},
4170 	{"chartreuse",127, 255, 0},
4171 	{"chocolate",210, 105, 30},
4172 	{"coral",255, 127, 80},
4173 	{"lightpink",255, 182, 193},
4174 	{"lightsalmon",255, 160, 122},
4175 	{"lightseagreen", 32, 178, 170},
4176 	{"lightskyblue",135, 206, 250},
4177 	{"lightslategray",119, 136, 153},
4178 	{"lightslategrey",119, 136, 153},
4179 	{"lightsteelblue",176, 196, 222},
4180 	{"lightyellow",255, 255, 224},
4181 	{"lime", 0, 255, 0},
4182 	{"limegreen", 50, 205, 50},
4183 	{"linen",250, 240, 230},
4184 	{"magenta",255, 0, 255},
4185 	{"maroon",128, 0, 0},
4186 	{"mediumaquamarine",102, 205, 170},
4187 	{"mediumblue", 0, 0, 205},
4188 	{"mediumorchid",186, 85, 211},
4189 	{"cornflowerblue",100, 149, 237},
4190 	{"cornsilk",255, 248, 220},
4191 	{"crimson",220, 20, 60},
4192 	{"cyan", 0, 255, 255},
4193 	{"darkblue", 0, 0, 139},
4194 	{"darkcyan", 0, 139, 139},
4195 	{"darkgoldenrod",184, 134, 11},
4196 	{"darkgray",169, 169, 169},
4197 	{"darkgreen", 0, 100, 0},
4198 	{"darkgrey",169, 169, 169},
4199 	{"darkkhaki",189, 183, 107},
4200 	{"darkmagenta",139, 0, 139},
4201 	{"darkolivegreen", 85, 107, 47},
4202 	{"darkorange",255, 140, 0},
4203 	{"darkorchid",153, 50, 204},
4204 	{"darkred",139, 0, 0},
4205 	{"darksalmon",233, 150, 122},
4206 	{"darkseagreen",143, 188, 143},
4207 	{"darkslateblue", 72, 61, 139},
4208 	{"darkslategray", 47, 79, 79},
4209 	{"darkslategrey", 47, 79, 79},
4210 	{"darkturquoise", 0, 206, 209},
4211 	{"darkviolet",148, 0, 211},
4212 	{"deeppink",255, 20, 147},
4213 	{"deepskyblue", 0, 191, 255},
4214 	{"dimgray",105, 105, 105},
4215 	{"dimgrey",105, 105, 105},
4216 	{"dodgerblue", 30, 144, 255},
4217 	{"firebrick",178, 34, 34},
4218 	{"floralwhite",255, 250, 240},
4219 	{"forestgreen", 34, 139, 34},
4220 	{"fuchsia",255, 0, 255},
4221 	{"gainsboro",220, 220, 220},
4222 	{"ghostwhite",248, 248, 255},
4223 	{"gold",255, 215, 0},
4224 	{"goldenrod",218, 165, 32},
4225 	{"gray",128, 128, 128},
4226 	{"grey",128, 128, 128},
4227 	{"green", 0, 128, 0},
4228 	{"greenyellow",173, 255, 47},
4229 	{"honeydew",240, 255, 240},
4230 	{"hotpink",255, 105, 180},
4231 	{"indianred",205, 92, 92},
4232 	{"indigo", 75, 0, 130},
4233 	{"ivory",255, 255, 240},
4234 	{"khaki",240, 230, 140},
4235 	{"lavender",230, 230, 25},
4236 	{"lavenderblush",255, 240, 245},
4237 	{"mediumpurple",147, 112, 219},
4238 	{"mediumseagreen", 60, 179, 113},
4239 	{"mediumslateblue",123, 104, 238},
4240 	{"mediumspringgreen", 0, 250, 154},
4241 	{"mediumturquoise", 72, 209, 204},
4242 	{"mediumvioletred",199, 21, 133},
4243 	{"midnightblue", 25, 25, 112},
4244 	{"mintcream",245, 255, 250},
4245 	{"mistyrose",255, 228, 225},
4246 	{"moccasin",255, 228, 181},
4247 	{"navajowhite",255, 222, 173},
4248 	{"navy", 0, 0, 128},
4249 	{"oldlace",253, 245, 230},
4250 	{"olive",128, 128, 0},
4251 	{"olivedrab",107, 142, 35},
4252 	{"orange",255, 165, 0},
4253 	{"orangered",255, 69, 0},
4254 	{"orchid",218, 112, 214},
4255 	{"palegoldenrod",238, 232, 170},
4256 	{"palegreen",152, 251, 152},
4257 	{"paleturquoise",175, 238, 238},
4258 	{"palevioletred",219, 112, 147},
4259 	{"papayawhip",255, 239, 213},
4260 	{"peachpuff",255, 218, 185},
4261 	{"peru",205, 133, 63},
4262 	{"pink",255, 192, 203},
4263 	{"plum",221, 160, 221},
4264 	{"powderblue",176, 224, 230},
4265 	{"purple",128, 0, 128},
4266 	{"red",255, 0, 0},
4267 	{"rosybrown",188, 143, 143},
4268 	{"royalblue", 65, 105, 225},
4269 	{"saddlebrown",139, 69, 19},
4270 	{"salmon",250, 128, 114},
4271 	{"sandybrown",244, 164, 96},
4272 	{"seagreen", 46, 139, 87},
4273 	{"seashell",255, 245, 238},
4274 	{"sienna",160, 82, 45},
4275 	{"silver",192, 192, 192},
4276 	{"skyblue",135, 206, 235},
4277 	{"slateblue",106, 90, 205},
4278 	{"slategray",112, 128, 144},
4279 	{"slategrey",112, 128, 144},
4280 	{"snow",255, 250, 250},
4281 	{"springgreen", 0, 255, 127},
4282 	{"steelblue", 70, 130, 180},
4283 	{"tan",210, 180, 140},
4284 	{"teal", 0, 128, 128},
4285 	{"lawngreen",124, 252, 0},
4286 	{"lemonchiffon",255, 250, 205},
4287 	{"lightblue",173, 216, 230},
4288 	{"lightcoral",240, 128, 128},
4289 	{"lightcyan",224, 255, 255},
4290 	{"lightgoldenrodyellow",250, 250, 210},
4291 	{"lightgray",211, 211, 211},
4292 	{"lightgreen",144, 238, 144},
4293 	{"lightgrey",211, 211, 211},
4294 	{"thistle",216, 191, 216},
4295 	{"tomato",255, 99, 71},
4296 	{"turquoise", 64, 224, 208},
4297 	{"violet",238, 130, 238},
4298 	{"wheat",245, 222, 179},
4299 	{"white",255, 255, 255},
4300 	{"whitesmoke",245, 245, 245},
4301 	{"yellow",255, 255, 0},
4302 	{"yellowgreen",154, 205, 50},
4303 	{"aqua", 0, 255, 255},
4304 
4305 };
4306 
4307 
4308 GF_EXPORT
gf_color_parse(const char * name)4309 GF_Color gf_color_parse(const char *name)
4310 {
4311 	u32 i, count;
4312 	u32 res;
4313 	if ((name[0]=='$') || (name[0]=='#')) {
4314 		sscanf(name+1, "%x", &res);
4315 		return res | 0xFF000000;
4316 	}
4317 	if (!strnicmp(name, "0x", 2) ) {
4318 		sscanf(name+2, "%x", &res);
4319 		return res | 0xFF000000;
4320 	}
4321 
4322 	count = sizeof(predefined_colors) / sizeof(struct predef_col);
4323 	for (i=0; i<count; i++) {
4324 		if (!strcmp(name, predefined_colors[i].name)) {
4325 			res = GF_COL_ARGB(0xFF, predefined_colors[i].r, predefined_colors[i].g, predefined_colors[i].b);
4326 			return res;
4327 		}
4328 	}
4329 
4330 	return 0;
4331 
4332 }
4333 
4334 GF_EXPORT
gf_color_get_name(GF_Color col)4335 const char *gf_color_get_name(GF_Color col)
4336 {
4337 	u32 i, count;
4338 	u8 r, g, b;
4339 
4340 	r = GF_COL_R(col);
4341 	g = GF_COL_G(col);
4342 	b = GF_COL_B(col);
4343 
4344 	count = sizeof(predefined_colors) / sizeof(struct predef_col);
4345 	for (i=0; i<count; i++) {
4346 		if (predefined_colors[i].r != r) continue;
4347 		if (predefined_colors[i].g != g) continue;
4348 		if (predefined_colors[i].b != b) continue;
4349 		return predefined_colors[i].name;
4350 	}
4351 	return NULL;
4352 
4353 }
4354