1 /*
2  *
3  *  This file is part of libmpeg3
4  *
5  * LibMPEG3
6  * Author: Adam Williams <broadcast@earthling.net>
7  * Page: heroine.linuxbox.com
8  * Page: http://www.smalltalkconsulting.com/html/mpeg3source.html (for Squeak)
9  *
10     LibMPEG3 was originally licenced under GPL. It was relicensed by
11     the author under the LGPL and the Squeak license on Nov 1st, 2000
12 
13     This library is free software; you can redistribute it and/or
14     modify it under the terms of the GNU Lesser General Public
15     License as published by the Free Software Foundation; either
16     version 2.1 of the License, or (at your option) any later version.
17 
18     This library is distributed in the hope that it will be useful,
19     but WITHOUT ANY WARRANTY; without even the implied warranty of
20     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21     Lesser General Public License for more details.
22 
23     You should have received a copy of the GNU Lesser General Public
24     License along with this library; if not, write to the Free Software
25     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
26 
27     Also licensed under the Squeak license.
28     http://www.squeak.org/license.html
29  */
30   /*  Changed Sept 15th by John M McIntosh to support Macintosh & Squeak
31       Feb/march 2001, JMM  tuning for the mac
32       I've coded up mpeg3video_ditherframeFastRGB555 and mpeg3video_ditherframeFastRGBA which
33       do specialized 16 and 32 bit Crb to rgb mapping (Intel should do this too?)
34       I also coded up clip arrays versus using CLIP() This avoid test/branchs which slows things down
35  	  May 19th, 2003, Ivo Roessling <ivo@wettinet.de> changes to asm syntax to enable compile in GCC 3.3.x
36 	  March 17th, 2006, John M McIntosh, set alpha to 0xff versus 0x00.
37  */
38 #include "libmpeg3.h"
39 #include "mpeg3video.h"
40 #include <string.h>
41 
42 unsigned char gClipArray[1026];
43 unsigned short gClipArray16r[1026],gClipArray16g[1026],gClipArray16b[1026];
44 unsigned char *gClipArray_ptr=&gClipArray[512];
45 unsigned short *gClipArray_ptr16r=&gClipArray16r[512];
46 unsigned short *gClipArray_ptr16g=&gClipArray16g[512];
47 unsigned short *gClipArray_ptr16b=&gClipArray16b[512];
48 int doClippingArrays=1;
49 
50 static unsigned char mpeg3_601_to_rgb[256];
51 
52 #ifdef HAVE_MMX
53 
54 #warning Beware of alpha this is not in the mmx code, and it appears tweak really wants it now
55 
56 static long long mpeg3_MMX_0 = 0L;
57 static unsigned long  mpeg3_MMX_10w[]         = {0x00100010, 0x00100010};                     /*dd    00010 0010h, 000100010h */
58 static unsigned long  mpeg3_MMX_80w[]         = {0x00800080, 0x00800080};                     /*dd    00080 0080h, 000800080h */
59 
60 static unsigned long  mpeg3_MMX_00FFw[]       = {0x00ff00ff, 0x00ff00ff};                     /*dd    000FF 00FFh, 000FF00FFh */
61 
62 static unsigned short mpeg3_MMX_Ublucoeff[]   = {0x81, 0x81, 0x81, 0x81};                     /*dd    00081 0081h, 000810081h */
63 static unsigned short mpeg3_MMX_Vredcoeff[]   = {0x66, 0x66, 0x66, 0x66};                     /*dd    00066 0066h, 000660066h */
64 
65 static unsigned short mpeg3_MMX_Ugrncoeff[]   = {0xffe8, 0xffe8, 0xffe8, 0xffe8};             /*dd    0FFE7 FFE7h, 0FFE7FFE7h */
66 static unsigned short mpeg3_MMX_Vgrncoeff[]   = {0xffcd, 0xffcd, 0xffcd, 0xffcd};             /*dd    0FFCC FFCCh, 0FFCCFFCCh */
67 
68 static unsigned short mpeg3_MMX_Ycoeff[]      = {0x4a, 0x4a, 0x4a, 0x4a};                     /*dd    0004A 004Ah, 0004A004Ah */
69 
70 static unsigned short mpeg3_MMX_redmask[]     = {0xf800, 0xf800, 0xf800, 0xf800};             /*dd    07c00 7c00h, 07c007c00h */
71 
72 static unsigned short mpeg3_MMX_grnmask[]     = {0x7e0, 0x7e0, 0x7e0, 0x7e0};                 /*dd    003e0 03e0h, 003e003e0h */
73 
74 /* Algorithm */
75 /* 			r = (int)(*y + 1.371 * (*cr - 128)); */
76 /* 			g = (int)(*y - 0.698 * (*cr - 128) - 0.336 * (*cb - 128)); */
77 /* 			b = (int)(*y + 1.732 * (*cb - 128)); */
78 
mpeg3video_rgb16_mmx(unsigned char * lum,unsigned char * cr,unsigned char * cb,unsigned char * out,int rows,int cols,int mod)79 inline void mpeg3video_rgb16_mmx(unsigned char *lum,
80 			unsigned char *cr,
81 			unsigned char *cb,
82             unsigned char *out,
83 			int rows,
84 			int cols,
85 			int mod)
86 {
87 	unsigned short *row1;
88 	int x;
89     unsigned char *y;
90 	int col1;
91 
92 	row1 = (unsigned short *)out;
93     col1 = cols + mod;
94     mod += cols + mod;
95     mod *= 2;
96 	y = lum + cols * rows;
97     x = 0;
98 
99     __asm__ __volatile__(	//"align 8\n"
100         "1:\n"
101             "movd           (%1),                   %%mm0\n"  /* 4 Cb	  0  0  0  0 u3 u2 u1 u0 */
102             "pxor           %%mm7,                  %%mm7\n"
103             "movd           (%0),                   %%mm1\n"  /* 4 Cr	  0  0  0  0 v3 v2 v1 v0 */
104             "punpcklbw      %%mm7,                  %%mm0\n"  /* 4 W cb   0 u3  0 u2  0 u1  0 u0 */
105             "punpcklbw      %%mm7,                  %%mm1\n"  /* 4 W cr   0 v3  0 v2  0 v1  0 v0 */
106 
107             "psubw          _mpeg3_MMX_80w,          %%mm0\n"
108             "psubw          _mpeg3_MMX_80w,          %%mm1\n"
109             "movq           %%mm0,                  %%mm2\n"  /* Cb 	  0 u3  0 u2  0 u1  0 u0 */
110             "movq           %%mm1,                  %%mm3\n"  /* Cr */
111             "pmullw         _mpeg3_MMX_Ugrncoeff,    %%mm2\n"  /* Cb2green 0 R3  0 R2  0 R1  0 R0 */
112             "movq           (%2),                   %%mm6\n"  /* L1 	 l7 L6 L5 L4 L3 L2 L1 L0 */
113             "pmullw         _mpeg3_MMX_Ublucoeff,    %%mm0\n"  /* Cb2blue */
114             "pand           _mpeg3_MMX_00FFw,        %%mm6\n"  /* L1 	 00 L6 00 L4 00 L2 00 L0 */
115             "pmullw         _mpeg3_MMX_Vgrncoeff,    %%mm3\n"  /* Cr2green */
116             "movq           (%2),                   %%mm7\n"  /* L2 */
117             "pmullw         _mpeg3_MMX_Vredcoeff,    %%mm1\n"  /* Cr2red */
118             "psrlw          $8,                     %%mm7\n"  /* L2 	 00 L7 00 L5 00 L3 00 L1 */
119             "pmullw         _mpeg3_MMX_Ycoeff,       %%mm6\n"  /* lum1 */
120             "paddw          %%mm3,                  %%mm2\n"  /* Cb2green + Cr2green == green */
121             "pmullw         _mpeg3_MMX_Ycoeff,       %%mm7\n"  /* lum2 */
122 
123             "movq           %%mm6,                  %%mm4\n"  /* lum1 */
124             "paddw          %%mm0,                  %%mm6\n"  /* lum1 +blue  00 B6 00 B4 00 B2 00 B0 */
125             "movq           %%mm4,                  %%mm5\n"  /* lum1 */
126             "paddw          %%mm1,                  %%mm4\n"  /* lum1 +red   00 R6 00 R4 00 R2 00 R0 */
127             "paddw          %%mm2,                  %%mm5\n"  /* lum1 +green 00 G6 00 G4 00 G2 00 G0 */
128             "psraw          $6,                     %%mm4\n"  /* R1 0 .. 64 */
129             "movq           %%mm7,                  %%mm3\n"  /* lum2                       00 L7 00 L5 00 L3 00 L1 */
130             "psraw          $6,                     %%mm5\n"  /* G1  - .. + */
131             "paddw          %%mm0,                  %%mm7\n"  /* Lum2 +blue 00 B7 00 B5 00 B3 00 B1 */
132             "psraw          $6,                     %%mm6\n"  /* B1         0 .. 64 */
133             "packuswb       %%mm4,                  %%mm4\n"  /* R1 R1 */
134             "packuswb       %%mm5,                  %%mm5\n"  /* G1 G1 */
135             "packuswb       %%mm6,                  %%mm6\n"  /* B1 B1 */
136             "punpcklbw      %%mm4,                  %%mm4\n"
137             "punpcklbw      %%mm5,                  %%mm5\n"
138 
139             "pand           _mpeg3_MMX_redmask,      %%mm4\n"
140             "psllw          $3,                     %%mm5\n"  /* GREEN       1 */
141             "punpcklbw      %%mm6,                  %%mm6\n"
142             "pand           _mpeg3_MMX_grnmask,      %%mm5\n"
143             "pand           _mpeg3_MMX_redmask,      %%mm6\n"
144             "por            %%mm5,                  %%mm4\n"  /* */
145             "psrlw          $11,                    %%mm6\n"  /* BLUE		 1 */
146             "movq           %%mm3,                  %%mm5\n"  /* lum2 */
147             "paddw          %%mm1,                  %%mm3\n"  /* lum2 +red	 00 R7 00 R5 00 R3 00 R1 */
148             "paddw          %%mm2,                  %%mm5\n"  /* lum2 +green 00 G7 00 G5 00 G3 00 G1 */
149             "psraw          $6,                     %%mm3\n"  /* R2 */
150             "por            %%mm6,                  %%mm4\n"  /* MM4 */
151             "psraw          $6,                     %%mm5\n"  /* G2 */
152             "movq           (%2, %3),               %%mm6\n"  /* L3 */
153             "psraw          $6,                     %%mm7\n"
154             "packuswb       %%mm3,                  %%mm3\n"
155             "packuswb       %%mm5,                  %%mm5\n"
156             "packuswb       %%mm7,                  %%mm7\n"
157             "pand           _mpeg3_MMX_00FFw,        %%mm6\n"  /* L3 */
158             "punpcklbw      %%mm3,                  %%mm3\n"
159             "punpcklbw      %%mm5,                  %%mm5\n"
160             "pmullw         _mpeg3_MMX_Ycoeff,       %%mm6\n"  /* lum3 */
161             "punpcklbw      %%mm7,                  %%mm7\n"
162             "psllw          $3,                     %%mm5\n"  /* GREEN 2 */
163             "pand           _mpeg3_MMX_redmask,      %%mm7\n"
164             "pand           _mpeg3_MMX_redmask,      %%mm3\n"
165             "psrlw          $11,                    %%mm7\n"  /* BLUE  2 */
166             "pand           _mpeg3_MMX_grnmask,      %%mm5\n"
167             "por            %%mm7,  				%%mm3\n"
168             "movq           (%2,%3),				%%mm7\n"  /* L4 */
169             "por            %%mm5,  				%%mm3\n"	 /* */
170             "psrlw          $8,                     %%mm7\n"    /* L4 */
171             "movq           %%mm4,  				%%mm5\n"
172             "punpcklwd      %%mm3,                  %%mm4\n"
173             "pmullw         _mpeg3_MMX_Ycoeff,       %%mm7\n"    /* lum4 */
174             "punpckhwd      %%mm3,                  %%mm5\n"
175 
176             "movq           %%mm4,  				(%4)\n"
177             "movq           %%mm5,  				8(%4)\n"
178 
179             "movq           %%mm6,  				%%mm4\n"		/* Lum3 */
180             "paddw          %%mm0,                  %%mm6\n"                /* Lum3 +blue */
181 
182             "movq           %%mm4,  				%%mm5\n"						/* Lum3 */
183             "paddw          %%mm1,                  %%mm4\n"       /* Lum3 +red */
184             "paddw          %%mm2,                  %%mm5\n"                        /* Lum3 +green */
185             "psraw          $6, 				    %%mm4\n"
186             "movq           %%mm7,   			    %%mm3\n"	/* Lum4 */
187             "psraw          $6, 				    %%mm5\n"
188             "paddw          %%mm0,                  %%mm7\n"                   /* Lum4 +blue */
189             "psraw          $6,                     %%mm6\n"                        /* Lum3 +blue */
190             "movq           %%mm3,                  %%mm0\n"  /* Lum4 */
191             "packuswb       %%mm4,                  %%mm4\n"
192             "paddw          %%mm1,                  %%mm3\n"  /* Lum4 +red */
193             "packuswb       %%mm5,                  %%mm5\n"
194             "paddw          %%mm2,                  %%mm0\n"         /* Lum4 +green */
195             "packuswb       %%mm6,                  %%mm6\n"
196             "punpcklbw      %%mm4,                  %%mm4\n"
197             "punpcklbw      %%mm5,                  %%mm5\n"
198             "punpcklbw      %%mm6,                  %%mm6\n"
199             "psllw          $3,                     %%mm5\n" /* GREEN 3 */
200             "pand           _mpeg3_MMX_redmask,      %%mm4\n"
201             "psraw          $6,         			%%mm3\n" /* psr 6 */
202             "psraw          $6,         			%%mm0\n"
203             "pand           _mpeg3_MMX_redmask,      %%mm6\n" /* BLUE */
204             "pand           _mpeg3_MMX_grnmask,      %%mm5\n"
205             "psrlw          $11,                    %%mm6\n"  /* BLUE  3 */
206             "por            %%mm5,  				%%mm4\n"
207             "psraw          $6,                     %%mm7\n"
208             "por            %%mm6,      			%%mm4\n"
209             "packuswb       %%mm3,                  %%mm3\n"
210             "packuswb       %%mm0,                  %%mm0\n"
211             "packuswb       %%mm7,                  %%mm7\n"
212             "punpcklbw      %%mm3,                  %%mm3\n"
213             "punpcklbw      %%mm0,                  %%mm0\n"
214             "punpcklbw      %%mm7,                  %%mm7\n"
215             "pand           _mpeg3_MMX_redmask,      %%mm3\n"
216             "pand           _mpeg3_MMX_redmask,      %%mm7\n" /* BLUE */
217             "psllw          $3,                     %%mm0\n" /* GREEN 4 */
218             "psrlw          $11,                    %%mm7\n"
219             "pand           _mpeg3_MMX_grnmask,      %%mm0\n"
220             "por            %%mm7,                  %%mm3\n"
221             "addl           $8,                             %6\n"
222             "por            %%mm0,                  %%mm3\n"
223 
224             "movq           %%mm4,                  %%mm5\n"
225 
226             "punpcklwd      %%mm3,                  %%mm4\n"
227             "punpckhwd      %%mm3,                  %%mm5\n"
228 
229             "movq           %%mm4,                  (%4,%5,2)\n"
230             "movq           %%mm5,                  8(%4,%5,2)\n"
231 
232             "addl           $8,                     %2\n"
233             "addl           $4,                     %0\n"
234             "addl           $4,                     %1\n"
235             "cmpl           %3,                     %6\n"
236             "leal           16(%4),                 %4\n"
237         "jl             1b\n"
238         "addl           %3,     %2\n"                   /* lum += cols */
239         "addl           %7,     %4\n"                   /* row1 += mod */
240         "movl           $0,     %6\n"
241         "cmpl           %8,     %2\n"
242         "jl             1b\n"
243         : : "r" (cr),
244 			"r" (cb),
245 			"r" (lum),
246 			"r" (cols),
247 			"r" (row1) ,
248 			"r" (col1),
249 			"m" (x),
250 			"m" (mod),
251 			"m" (y)
252 		);
253 }
254 
255 static unsigned long long  mpeg3_MMX_U_80 = 0x0000008000800000LL;
256 static unsigned long long  mpeg3_MMX_V_80 = 0x0000000000800080LL;
257 static long long  mpeg3_MMX_U_COEF        = 0x00000058ffd30000LL;
258 static long long  mpeg3_MMX_V_COEF        = 0x00000000ffea006fLL;
259 static long long  mpeg3_MMX_601_Y_COEF    = 0x0000004800480048LL;
260 static long long  mpeg3_MMX_601_Y_DIFF    = 0x0000000000000010LL;
261 
mpeg3_bgra32_mmx(unsigned long y,unsigned long u,unsigned long v,unsigned long * output)262 inline void mpeg3_bgra32_mmx(unsigned long y,
263 		unsigned long u,
264 		unsigned long v,
265 		unsigned long *output)
266 {
267 	asm(
268 "\n"
269 "/* Output will be 0x00rrggbb with the 00 trailing so this can also be used */\n"
270 "/* for bgr24. */\n"
271 "	movd (%0), %%mm0;          /* Load y   0x00000000000000yy */\n"
272 "	movd (%1), %%mm1;          /* Load u    0x00000000000000cr */\n"
273 "	movq %%mm0, %%mm3;         /* Copy y to temp */\n"
274 "	psllq $16, %%mm1;          /* Shift u   0x0000000000cr0000 */\n"
275 "	movd (%2), %%mm2;          /* Load v    0x00000000000000cb */\n"
276 "	psllq $16, %%mm3;          /* Shift y */\n"
277 "	movq %%mm1, %%mm4;         /* Copy u to temp */\n"
278 "	por %%mm3, %%mm0;          /* Overlay new y byte 0x0000000000yy00yy */\n"
279 "	psllq $16, %%mm4;          /* Shift u */\n"
280 "	movq %%mm2, %%mm5;         /* Copy v to temp */\n"
281 "	psllq $16, %%mm3;          /* Shift y  */\n"
282 "	por %%mm4, %%mm1;          /* Overlay new u byte 0x000000cr00cr0000 */\n"
283 "	psllq $16, %%mm5;          /* Shift v  */\n"
284 "	por %%mm3, %%mm0;          /* Overlay new y byte 0x000000yy00yy00yy */\n"
285 "	por %%mm5, %%mm2;          /* Overlay new v byte 0x0000000000cb00cb */\n"
286 "\n"
287 "/* mm0: 0x000000yy00yy00yy mm1: 0x000000uu00uu0000 mm2: 0x0000000000vv00vv */\n"
288 " 	psubw _mpeg3_MMX_U_80, %%mm1;    /* Subtract 128 from u 0x000000uu00uu0000 */\n"
289 " 	pmullw _mpeg3_MMX_U_COEF, %%mm1; /* Multiply u coeffs 0x0000uuuuuuuu0000 */\n"
290 " 	psllw $6, %%mm0;                /* Shift y coeffs 0x0000yyy0yyy0yyy0 */\n"
291 " 	psubw _mpeg3_MMX_V_80, %%mm2;    /* Subtract 128 from v 0x0000000000cb00cb */\n"
292 " 	pmullw _mpeg3_MMX_V_COEF, %%mm2; /* Multiply v coeffs 0x0000crcrcrcrcrcr */\n"
293 "\n"
294 "/* mm0: 0x000000yy00yy00yy mm1: 0x0000uuuuuuuu0000 mm2: 0x00000000vvvvvvvv */\n"
295 "	paddsw %%mm1, %%mm0;        /* Add u to result */\n"
296 "	paddsw %%mm2, %%mm0;        /* Add v to result 0x0000rrrrggggbbbb */\n"
297 "	psraw $6, %%mm0;           /* Demote precision */\n"
298 "	packuswb %%mm0, %%mm0;     /* Pack into ARGB 0x0000000000rrggbb */\n"
299 "	movd %%mm0, (%3);          /* Store output */\n"
300 "	\n"
301 :
302 : "r" (&y), "r" (&u), "r" (&v), "r" (output));
303 }
304 
mpeg3_601_bgra32_mmx(unsigned long y,unsigned long u,unsigned long v,unsigned long * output)305 inline void mpeg3_601_bgra32_mmx(unsigned long y,
306 		unsigned long u,
307 		unsigned long v,
308 		unsigned long *output)
309 {
310 asm("\n"
311 "/* Output will be 0x00rrggbb with the 00 trailing so this can also be used */\n"
312 "/* for bgr24. */\n"
313 "	movd (%0), %%mm0;          /* Load y   0x00000000000000yy */\n"
314 "	psubsw _mpeg3_MMX_601_Y_DIFF, %%mm0;      /* Subtract 16 from y */\n"
315 "	movd (%1), %%mm1;          /* Load u    0x00000000000000cr */\n"
316 "	movq %%mm0, %%mm3;         /* Copy y to temp */\n"
317 "	psllq $16, %%mm1;          /* Shift u   0x0000000000cr0000 */\n"
318 "	movd (%2), %%mm2;          /* Load v    0x00000000000000cb */\n"
319 "	psllq $16, %%mm3;          /* Shift y */\n"
320 "	movq %%mm1, %%mm4;         /* Copy u to temp */\n"
321 "	por %%mm3, %%mm0;          /* Overlay new y byte 0x0000000000yy00yy */\n"
322 "	psllq $16, %%mm4;          /* Shift u */\n"
323 "	movq %%mm2, %%mm5;         /* Copy v to temp */\n"
324 "	psllq $16, %%mm3;          /* Shift y  */\n"
325 "	por %%mm4, %%mm1;          /* Overlay new u byte 0x000000cr00cr0000 */\n"
326 "	psllq $16, %%mm5;          /* Shift v  */\n"
327 "	por %%mm3, %%mm0;          /* Overlay new y byte 0x000000yy00yy00yy */\n"
328 "	por %%mm5, %%mm2;          /* Overlay new v byte 0x0000000000cb00cb */\n"
329 "\n"
330 "/* mm0: 0x000000yy00yy00yy mm1: 0x000000uu00uu0000 mm2: 0x0000000000vv00vv */\n"
331 "	pmullw _mpeg3_MMX_601_Y_COEF, %%mm0; /* Scale and shift y coeffs */\n"
332 "	psubw _mpeg3_MMX_U_80, %%mm1;     /* Subtract 128 from u 0x000000uu00uu0000 */\n"
333 " 	pmullw _mpeg3_MMX_U_COEF, %%mm1;  /* Multiply u coeffs 0x0000uuuuuuuu0000 */\n"
334 "	psubw _mpeg3_MMX_V_80, %%mm2;     /* Subtract 128 from v 0x0000000000cb00cb */\n"
335 " 	pmullw _mpeg3_MMX_V_COEF, %%mm2;  /* Multiply v coeffs 0x0000crcrcrcrcrcr */\n"
336 "\n"
337 "/* mm0: 0x000000yy00yy00yy mm1: 0x0000uuuuuuuu0000 mm2: 0x00000000vvvvvvvv */\n"
338 "	paddsw %%mm1, %%mm0;        /* Add u to result */\n"
339 "	paddsw %%mm2, %%mm0;        /* Add v to result 0x0000rrrrggggbbbb */\n"
340 "	psraw $6, %%mm0;           /* Demote precision */\n"
341 "	packuswb %%mm0, %%mm0;     /* Pack into ARGB 0x0000000000rrggbb */\n"
342 "	movd %%mm0, (%3);          /* Store output */\n"
343 "	\n"
344 :
345 : "r" (&y), "r" (&u), "r" (&v), "r" (output));
346 }
347 
348 static unsigned long long  mpeg3_MMX_U_80_RGB    = 0x0000000000800080LL;
349 static unsigned long long  mpeg3_MMX_V_80_RGB    = 0x0000008000800000LL;
350 static long long  mpeg3_MMX_U_COEF_RGB    = 0x00000000ffd30058LL;
351 static long long  mpeg3_MMX_V_COEF_RGB    = 0x0000006fffea0000LL;
352 
mpeg3_rgba32_mmx(unsigned long y,unsigned long u,unsigned long v,unsigned long * output)353 inline void mpeg3_rgba32_mmx(unsigned long y,
354 		unsigned long u,
355 		unsigned long v,
356 		unsigned long *output)
357 {
358 asm("\n"
359 "/* Output will be 0x00bbggrr with the 00 trailing so this can also be used */\n"
360 "/* for rgb24. */\n"
361 "	movd (%0), %%mm0;          /* Load y   0x00000000000000yy */\n"
362 "	movd (%1), %%mm1;          /* Load v    0x00000000000000vv */\n"
363 "	movq %%mm0, %%mm3;         /* Copy y to temp */\n"
364 "	psllq $16, %%mm1;          /* Shift v   0x0000000000vv0000 */\n"
365 "	movd (%2), %%mm2;          /* Load u    0x00000000000000uu */\n"
366 "	psllq $16, %%mm3;          /* Shift y */\n"
367 "	movq %%mm1, %%mm4;         /* Copy v to temp */\n"
368 "	por %%mm3, %%mm0;          /* Overlay new y byte 0x0000000000yy00yy */\n"
369 "	psllq $16, %%mm4;          /* Shift v */\n"
370 "	movq %%mm2, %%mm5;         /* Copy u to temp */\n"
371 "	psllq $16, %%mm3;          /* Shift y  */\n"
372 "	por %%mm4, %%mm1;          /* Overlay new v byte 0x000000vv00vv0000 */\n"
373 "	psllq $16, %%mm5;          /* Shift u  */\n"
374 "	por %%mm3, %%mm0;          /* Overlay new y byte 0x000000yy00yy00yy */\n"
375 "	por %%mm5, %%mm2;          /* Overlay new u byte 0x0000000000uu00uu */\n"
376 "\n"
377 "/* mm0: 0x000000yy00yy00yy mm1: 0x000000vv00vv0000 mm2: 0x0000000000uu00uu */\n"
378 " 	psubw _mpeg3_MMX_V_80_RGB, %%mm1;    /* Subtract 128 from v 0x000000vv00vv0000 */\n"
379 " 	pmullw _mpeg3_MMX_V_COEF_RGB, %%mm1; /* Multiply v coeffs 0x0000vvvvvvvv0000 */\n"
380 " 	psllw $6, %%mm0;                /* Shift y coeffs 0x0000yyy0yyy0yyy0 */\n"
381 " 	psubw _mpeg3_MMX_U_80_RGB, %%mm2;    /* Subtract 128 from u 0x0000000000uu00uu */\n"
382 " 	pmullw _mpeg3_MMX_U_COEF_RGB, %%mm2; /* Multiply u coeffs 0x0000uuuuuuuuuuuu */\n"
383 "\n"
384 "/* mm0: 0x000000yy00yy00yy mm1: 0x0000vvvvvvvv0000 mm2: 0x00000000uuuuuuuu */\n"
385 "	paddsw %%mm1, %%mm0;        /* Add v to result */\n"
386 "	paddsw %%mm2, %%mm0;        /* Add u to result 0x0000bbbbggggrrrr */\n"
387 "	psraw $6, %%mm0;           /* Demote precision */\n"
388 "	packuswb %%mm0, %%mm0;     /* Pack into RGBA 0x0000000000bbggrr */\n"
389 "	movd %%mm0, (%3);          /* Store output */\n"
390 "	\n"
391 :
392 : "r" (&y), "r" (&v), "r" (&u), "r" (output));
393 }
394 
mpeg3_601_rgba32_mmx(unsigned long y,unsigned long u,unsigned long v,unsigned long * output)395 inline void mpeg3_601_rgba32_mmx(unsigned long y,
396 		unsigned long u,
397 		unsigned long v,
398 		unsigned long *output)
399 {
400 asm("\n"
401 "/* Output will be 0x00bbggrr with the 00 trailing so this can also be used */\n"
402 "/* for rgb24. */\n"
403 "	movd (%0), %%mm0;          /* Load y   0x00000000000000yy */\n"
404 "	psubsw _mpeg3_MMX_601_Y_DIFF, %%mm0;      /* Subtract 16 from y */\n"
405 "	movd (%1), %%mm1;          /* Load v    0x00000000000000vv */\n"
406 "	movq %%mm0, %%mm3;         /* Copy y to temp */\n"
407 "	psllq $16, %%mm1;          /* Shift v   0x0000000000vv0000 */\n"
408 "	movd (%2), %%mm2;          /* Load u    0x00000000000000uu */\n"
409 "	psllq $16, %%mm3;          /* Shift y */\n"
410 "	movq %%mm1, %%mm4;         /* Copy v to temp */\n"
411 "	por %%mm3, %%mm0;          /* Overlay new y byte 0x0000000000yy00yy */\n"
412 "	psllq $16, %%mm4;          /* Shift v */\n"
413 "	movq %%mm2, %%mm5;         /* Copy u to temp */\n"
414 "	psllq $16, %%mm3;          /* Shift y  */\n"
415 "	por %%mm4, %%mm1;          /* Overlay new v byte 0x000000vv00vv0000 */\n"
416 "	psllq $16, %%mm5;          /* Shift u  */\n"
417 "	por %%mm3, %%mm0;          /* Overlay new y byte 0x000000yy00yy00yy */\n"
418 "	por %%mm5, %%mm2;          /* Overlay new u byte 0x0000000000uu00uu */\n"
419 "\n"
420 "/* mm0: 0x000000yy00yy00yy     mm1: 0x000000vv00vv0000     mm2: 0x0000000000uu00uu */\n"
421 "	pmullw _mpeg3_MMX_601_Y_COEF, %%mm0;     /* Scale y coeffs */\n"
422 " 	psubw _mpeg3_MMX_V_80_RGB, %%mm1;    /* Subtract 128 from v 0x000000vv00vv0000 */\n"
423 " 	pmullw _mpeg3_MMX_V_COEF_RGB, %%mm1; /* Multiply v coeffs 0x0000vvvvvvvv0000 */\n"
424 " 	psubw _mpeg3_MMX_U_80_RGB, %%mm2;    /* Subtract 128 from u 0x0000000000uu00uu */\n"
425 " 	pmullw _mpeg3_MMX_U_COEF_RGB, %%mm2; /* Multiply u coeffs 0x0000uuuuuuuuuuuu */\n"
426 "\n"
427 "/* mm0: 0x000000yy00yy00yy mm1: 0x0000vvvvvvvv0000 mm2: 0x00000000uuuuuuuu */\n"
428 "	paddsw %%mm1, %%mm0;        /* Add v to result */\n"
429 "	paddsw %%mm2, %%mm0;        /* Add u to result 0x0000bbbbggggrrrr */\n"
430 "	psraw $6, %%mm0;           /* Demote precision */\n"
431 "	packuswb %%mm0, %%mm0;     /* Pack into RGBA 0x0000000000bbggrr */\n"
432 "	movd %%mm0, (%3);          /* Store output */\n"
433 "	\n"
434 :
435 : "r" (&y), "r" (&v), "r" (&u), "r" (output));
436 }
437 
438 #endif
439 
440 #define DITHER_ROW_HEAD \
441 	for(h = 0; h < video->out_h; h++) \
442 	{ \
443 		y_in = &src[0][(video->y_table[h] + video->in_y) * video->coded_picture_width] + video->in_x; \
444 		cb_in = &src[1][((video->y_table[h] + video->in_y) >> 1) * video->chrom_width] + (video->in_x >> 2); \
445 		cr_in = &src[2][((video->y_table[h] + video->in_y) >> 1) * video->chrom_width] + (video->in_x >> 1); \
446 		data = output_rows[h];
447 
448 #define DITHER_ROW_TAIL \
449 	}
450 
451 #define DITHER_SCALE_HEAD \
452 	for(w = 0; w < video->out_w; w++) \
453 	{ \
454 		uv_subscript = video->x_table[w] / 2; \
455 		y_l = y_in[video->x_table[w]]; \
456 		y_l <<= 16; \
457 		r_l = (y_l + video->cr_to_r[cr_in[uv_subscript]]) >> 16; \
458 		g_l = (y_l + video->cr_to_g[cr_in[uv_subscript]] + video->cb_to_g[cb_in[uv_subscript]]) >> 16; \
459 		b_l = (y_l + video->cb_to_b[cb_in[uv_subscript]]) >> 16;
460 
461 #define DITHER_SCALE_601_HEAD \
462 	for(w = 0; w < video->out_w; w++) \
463 	{ \
464 		uv_subscript = video->x_table[w] / 2; \
465 		y_l = mpeg3_601_to_rgb[y_in[video->x_table[w]]]; \
466 		y_l <<= 16; \
467 		r_l = (y_l + video->cr_to_r[cr_in[uv_subscript]]) >> 16; \
468 		g_l = (y_l + video->cr_to_g[cr_in[uv_subscript]] + video->cb_to_g[cb_in[uv_subscript]]) >> 16; \
469 		b_l = (y_l + video->cb_to_b[cb_in[uv_subscript]]) >> 16;
470 
471 #define DITHER_SCALE_TAIL \
472 	}
473 
474 #define DITHER_MMX_SCALE_HEAD \
475 	for(w = 0; w < video->out_w; w++) \
476 	{ \
477 		uv_subscript = video->x_table[w] / 2;
478 
479 #define DITHER_MMX_SCALE_TAIL \
480 		data += step; \
481 	}
482 
483 #define DITHER_MMX_HEAD \
484 	for(w = 0; w < video->out_w; w += 2) \
485 	{
486 
487 #define DITHER_MMX_TAIL \
488 		data += step; \
489         cr_in++; \
490         cb_in++; \
491 	}
492 
493 #define DITHER_HEAD \
494     for(w = 0; w < video->horizontal_size; w++) \
495 	{ \
496 		y_l = *y_in++; \
497 		y_l <<= 16; \
498 		r_l = (y_l + video->cr_to_r[*cr_in]) >> 16; \
499 		g_l = (y_l + video->cr_to_g[*cr_in] + video->cb_to_g[*cb_in]) >> 16; \
500 		b_l = (y_l + video->cb_to_b[*cb_in]) >> 16;
501 
502 #define DITHER_601_HEAD \
503     for(w = 0; w < video->horizontal_size; w++) \
504 	{ \
505 		y_l = mpeg3_601_to_rgb[*y_in++]; \
506 		y_l <<= 16; \
507 		r_l = (y_l + video->cr_to_r[*cr_in]) >> 16; \
508 		g_l = (y_l + video->cr_to_g[*cr_in] + video->cb_to_g[*cb_in]) >> 16; \
509 		b_l = (y_l + video->cb_to_b[*cb_in]) >> 16;
510 
511 #define DITHER_TAIL \
512     	if(w & 1) \
513 		{ \
514         	cr_in++; \
515         	cb_in++; \
516     	} \
517     }
518 
519 
520 #define STORE_PIXEL_BGR888 \
521 	*data++ = clipArray_ptr[b_l]; \
522 	*data++ = clipArray_ptr[g_l]; \
523 	*data++ = clipArray_ptr[r_l];
524 
525 #define STORE_PIXEL_BGRA8888 \
526 	*data++ = clipArray_ptr[b_l]; \
527 	*data++ = clipArray_ptr[g_l]; \
528 	*data++ = clipArray_ptr[r_l]; \
529 	*data++ = 0xFF;
530 
531 #define STORE_PIXEL_RGB565 \
532     foo = ((clipArray_ptr[r_l] & 0xf8) << 8) | \
533 		((clipArray_ptr[g_l] & 0xfc) << 3) | \
534 		((clipArray_ptr[b_l] & 0xf8) >> 3); *(unsigned short*)data = foo; data += 2;
535 
536 #define STORE_PIXEL_RGB555 \
537     foo = ((clipArray_ptr[r_l] & 0xf8) << 7) | \
538 		((clipArray_ptr[g_l] & 0xf8) << 2) | \
539 		((clipArray_ptr[b_l] & 0xf8) >> 3); *(unsigned short*)data = foo; data += 2;
540 
541 #define STORE_PIXEL_RGBI555 \
542     foo = ((clipArray_ptr[r_l] & 0xf8) << 7) | \
543           ((clipArray_ptr[g_l] & 0xf8) << 2) | \
544           ((clipArray_ptr[b_l] & 0xf8) >> 3); \
545     if((unsigned long)data & 0x00000002L) { \
546 	  data -= 2; \
547           *(unsigned short*)data = foo; \
548           data += 4; \
549     } else { \
550 	  data += 2; \
551           *(unsigned short*)data = foo; \
552     }
553 
554 
555 #define STORE_PIXEL_RGB888 \
556 	*data++ = clipArray_ptr[r_l]; \
557 	*data++ = clipArray_ptr[g_l]; \
558 	*data++ = clipArray_ptr[b_l];
559 
560 #define STORE_PIXEL_RGBA8888 \
561 	*data++ = clipArray_ptr[r_l]; \
562 	*data++ = clipArray_ptr[g_l]; \
563 	*data++ = clipArray_ptr[b_l]; \
564 	*data++ = 0xFF;
565 
566 #define STORE_PIXEL_ARGB8888 \
567 	*data++ = 0xFF; \
568 	*data++ = clipArray_ptr[r_l]; \
569 	*data++ = clipArray_ptr[g_l]; \
570 	*data++ = clipArray_ptr[b_l];
571 
572 #define STORE_PIXEL_RGBA16161616 \
573 	*data_s++ = clipArray_ptr[r_l]; \
574 	*data_s++ = clipArray_ptr[g_l]; \
575 	*data_s++ = clipArray_ptr[b_l]; \
576 	*data_s++ = 0xFFFF;
577 
578 
579 
580 /* Only good for YUV 4:2:0 */
mpeg3video_ditherframe(mpeg3video_t * video,unsigned char ** src,unsigned char ** output_rows)581 int mpeg3video_ditherframe(mpeg3video_t *video, unsigned char **src, unsigned char **output_rows)
582 {
583 	int h = 0;
584 	register unsigned char *y_in, *cb_in, *cr_in;
585 	long y_l, r_l, b_l, g_l;
586 	register unsigned char *data,*clipArray_ptr=gClipArray_ptr;
587 	register int uv_subscript, step, w = -1;
588 	register short foo;
589 
590 #ifdef HAVE_MMX
591 /* =================================== MMX ===================================== */
592 	if(video->have_mmx &&
593 		video->out_w == video->horizontal_size &&
594 		video->out_h == video->vertical_size &&
595 		video->in_w == video->out_w &&
596 		video->in_h == video->out_h &&
597 		video->in_x == 0 &&
598 		video->in_y == 0 &&
599 		(video->color_model == MPEG3_RGB565 || video->color_model == MPEG3_601_RGB565 ||
600 		video->color_model == MPEG3_RGB555 || video->color_model == MPEG3_601_RGB555 ||
601 		video->color_model == MPEG3_RGBI555 || video->color_model == MPEG3_601_RGBI555	))
602 	{
603 /* Unscaled 16 bit */
604 		mpeg3video_rgb16_mmx(src[0],
605 			src[2],
606 			src[1],
607 			output_rows[0],
608 			video->out_h,
609 			video->out_w,
610 			(output_rows[1] - output_rows[0]) / 2 - video->out_w);
611 	}
612 	else
613 	if(video->have_mmx &&
614 		(video->color_model == MPEG3_BGRA8888 ||
615 		video->color_model == MPEG3_BGR888 ||
616 /*		video->color_model == MPEG3_RGB888 || */
617 		video->color_model == MPEG3_RGBA8888 ||
618 		video->color_model == MPEG3_ARGB8888 ||
619 		video->color_model == MPEG3_601_BGR888 ||
620 		video->color_model == MPEG3_601_BGRA8888 ||
621 		video->color_model == MPEG3_601_RGB888 ||
622 		video->color_model == MPEG3_601_RGBA8888||
623 		video->color_model == MPEG3_601_ARGB8888))
624 	{
625 /* Original MMX */
626 		if(video->color_model == MPEG3_BGRA8888 ||
627 			video->color_model == MPEG3_RGBA8888 ||
628 			video->color_model == MPEG3_ARGB8888 ||
629 			video->color_model == MPEG3_601_BGRA8888 ||
630 			video->color_model == MPEG3_601_RGBA8888 ||
631 			video->color_model == MPEG3_601_ARGB8888) step = 4;
632 		else
633 		if(video->color_model == MPEG3_BGR888 ||
634 			video->color_model == MPEG3_RGB888 ||
635 			video->color_model == MPEG3_601_BGR888 ||
636 			video->color_model == MPEG3_601_RGB888) step = 3;
637 
638 		DITHER_ROW_HEAD
639 /* Transfer row with scaling */
640 			if(video->out_w != video->horizontal_size)
641 			{
642 				switch(video->color_model)
643 				{
644 					case MPEG3_BGRA8888:
645 					case MPEG3_BGR888:
646 						DITHER_MMX_SCALE_HEAD
647 							mpeg3_bgra32_mmx(y_in[video->x_table[w]],
648 								cr_in[uv_subscript],
649 								cb_in[uv_subscript],
650 								(unsigned long*)data);
651 						DITHER_MMX_SCALE_TAIL
652 						break;
653 
654 					case MPEG3_601_BGRA8888:
655 					case MPEG3_601_BGR888:
656 						DITHER_MMX_SCALE_HEAD
657 							mpeg3_601_bgra32_mmx(y_in[video->x_table[w]],
658 								cr_in[uv_subscript],
659 								cb_in[uv_subscript],
660 								(unsigned long*)data);
661 						DITHER_MMX_SCALE_TAIL
662 						break;
663 
664 					case MPEG3_RGBA8888:
665 					case MPEG3_ARGB8888:
666 					case MPEG3_RGB888:
667 						DITHER_MMX_SCALE_HEAD
668 							mpeg3_rgba32_mmx(y_in[video->x_table[w]],
669 								cr_in[uv_subscript],
670 								cb_in[uv_subscript],
671 								(unsigned long*)data);
672 						DITHER_MMX_SCALE_TAIL
673 						break;
674 
675 					case MPEG3_601_RGBA8888:
676 					case MPEG3_601_ARGB8888:
677 					case MPEG3_601_RGB888:
678 						DITHER_MMX_SCALE_HEAD
679 							mpeg3_601_rgba32_mmx(y_in[video->x_table[w]],
680 								cr_in[uv_subscript],
681 								cb_in[uv_subscript],
682 								(unsigned long*)data);
683 						DITHER_MMX_SCALE_TAIL
684 						break;
685 				}
686 			}
687 			else
688 /* Transfer row unscaled */
689 			{
690 				switch(video->color_model)
691 				{
692 /* MMX byte swap 24 and 32 bit */
693 					case MPEG3_BGRA8888:
694 					case MPEG3_BGR888:
695 						DITHER_MMX_HEAD
696 							mpeg3_bgra32_mmx(*y_in++,
697 								*cr_in,
698 								*cb_in,
699 								(unsigned long*)data);
700 							data += step;
701 							mpeg3_bgra32_mmx(*y_in++,
702 								*cr_in,
703 								*cb_in,
704 								(unsigned long*)data);
705 						DITHER_MMX_TAIL
706 						break;
707 
708 /* MMX 601 byte swap 24 and 32 bit */
709 					case MPEG3_601_BGRA8888:
710 					case MPEG3_601_BGR888:
711 						DITHER_MMX_HEAD
712 							mpeg3_601_bgra32_mmx(*y_in++,
713 								*cr_in,
714 								*cb_in,
715 								(unsigned long*)data);
716 							data += step;
717 							mpeg3_601_bgra32_mmx(*y_in++,
718 								*cr_in,
719 								*cb_in,
720 								(unsigned long*)data);
721 						DITHER_MMX_TAIL
722 						break;
723 
724 /* MMX 24 and 32 bit no byte swap */
725 					case MPEG3_RGBA8888:
726 					case MPEG3_ARGB8888:
727 					case MPEG3_RGB888:
728 						DITHER_MMX_HEAD
729 							mpeg3_rgba32_mmx(*y_in++,
730 								*cr_in,
731 								*cb_in,
732 								(unsigned long*)data);
733 							data += step;
734 							mpeg3_rgba32_mmx(*y_in++,
735 								*cr_in,
736 								*cb_in,
737 								(unsigned long*)data);
738 						DITHER_MMX_TAIL
739 						break;
740 
741 /* MMX 601 24 and 32 bit no byte swap */
742 					case MPEG3_601_RGBA8888:
743 					case MPEG3_601_ARGB8888:
744 					case MPEG3_601_RGB888:
745 						DITHER_MMX_HEAD
746 							mpeg3_601_rgba32_mmx(*y_in++,
747 								*cr_in,
748 								*cb_in,
749 								(unsigned long*)data);
750 							data += step;
751 							mpeg3_601_rgba32_mmx(*y_in++,
752 								*cr_in,
753 								*cb_in,
754 								(unsigned long*)data);
755 						DITHER_MMX_TAIL
756 						break;
757 				}
758 			}
759 		DITHER_ROW_TAIL
760 	}
761 	else
762 #endif
763 /* ================================== NO MMX ==================================== */
764 	{
765 		DITHER_ROW_HEAD
766 /* Transfer row with scaling */
767 			if(video->out_w != video->horizontal_size)
768 			{
769 				switch(video->color_model)
770 				{
771 					case MPEG3_BGR888:
772 						DITHER_SCALE_HEAD
773 						STORE_PIXEL_BGR888
774 						DITHER_SCALE_TAIL
775 						break;
776 					case MPEG3_BGRA8888:
777 						DITHER_SCALE_HEAD
778 						STORE_PIXEL_BGRA8888
779 						DITHER_SCALE_TAIL
780 						break;
781 					case MPEG3_RGB565:
782 						DITHER_SCALE_HEAD
783 						STORE_PIXEL_RGB565
784 						DITHER_SCALE_TAIL
785 						break;
786 					case MPEG3_RGB555:
787 						DITHER_SCALE_HEAD
788 						STORE_PIXEL_RGB555
789 						DITHER_SCALE_TAIL
790 						break;
791 					case MPEG3_RGBI555:
792 						DITHER_SCALE_HEAD
793 						STORE_PIXEL_RGBI555
794 						DITHER_SCALE_TAIL
795 						break;
796 					case MPEG3_RGB888:
797 						DITHER_SCALE_HEAD
798 						STORE_PIXEL_RGB888
799 						DITHER_SCALE_TAIL
800 						break;
801 					case MPEG3_RGBA8888:
802 						DITHER_SCALE_HEAD
803 						STORE_PIXEL_RGBA8888
804 						DITHER_SCALE_TAIL
805 						break;
806 					case MPEG3_ARGB8888:
807 						DITHER_SCALE_HEAD
808 						STORE_PIXEL_ARGB8888
809 						DITHER_SCALE_TAIL
810 						break;
811 					case MPEG3_601_BGR888:
812 						DITHER_SCALE_601_HEAD
813 						STORE_PIXEL_BGR888
814 						DITHER_SCALE_TAIL
815 						break;
816 					case MPEG3_601_BGRA8888:
817 						DITHER_SCALE_601_HEAD
818 						STORE_PIXEL_BGRA8888
819 						DITHER_SCALE_TAIL
820 						break;
821 					case MPEG3_601_RGB565:
822 						DITHER_SCALE_601_HEAD
823 						STORE_PIXEL_RGB565
824 						DITHER_SCALE_TAIL
825 						break;
826 					case MPEG3_601_RGB555:
827 						DITHER_SCALE_601_HEAD
828 						STORE_PIXEL_RGB555
829 						DITHER_SCALE_TAIL
830 						break;
831 					case MPEG3_601_RGBI555:
832 						DITHER_SCALE_601_HEAD
833 						STORE_PIXEL_RGBI555
834 						DITHER_SCALE_TAIL
835 						break;
836 					case MPEG3_601_RGB888:
837 						DITHER_SCALE_601_HEAD
838 						STORE_PIXEL_RGB888
839 						DITHER_SCALE_TAIL
840 						break;
841 					case MPEG3_601_RGBA8888:
842 						DITHER_SCALE_601_HEAD
843 						STORE_PIXEL_RGBA8888
844 						DITHER_SCALE_TAIL
845 						break;
846 					case MPEG3_601_ARGB8888:
847 						DITHER_SCALE_601_HEAD
848 						STORE_PIXEL_ARGB8888
849 						DITHER_SCALE_TAIL
850 						break;
851 					case MPEG3_RGBA16161616:
852 					{
853 						register unsigned short *data_s = (unsigned short*)data;
854 						DITHER_SCALE_HEAD
855 						STORE_PIXEL_RGBA16161616
856 						DITHER_SCALE_TAIL
857 					}
858 						break;
859 				}
860 			}
861 			else
862 			{
863 /* Transfer row unscaled */
864 				switch(video->color_model)
865 				{
866 					case MPEG3_BGR888:
867 						DITHER_HEAD
868 						STORE_PIXEL_BGR888
869 						DITHER_TAIL
870 						break;
871 					case MPEG3_BGRA8888:
872 						DITHER_HEAD
873 						STORE_PIXEL_BGRA8888
874 						DITHER_TAIL
875 						break;
876 					case MPEG3_RGB565:
877 						DITHER_HEAD
878 						STORE_PIXEL_RGB565
879 						DITHER_TAIL
880 						break;
881 					case MPEG3_RGB555:
882 						DITHER_HEAD
883 						STORE_PIXEL_RGB555
884 						DITHER_TAIL
885 						break;
886 					case MPEG3_RGBI555:
887 						DITHER_HEAD
888 						STORE_PIXEL_RGBI555
889 						DITHER_TAIL
890 						break;
891 					case MPEG3_RGB888:
892 						DITHER_HEAD
893 						STORE_PIXEL_RGB888
894 						DITHER_TAIL
895 						break;
896 					case MPEG3_RGBA8888:
897 						DITHER_HEAD
898 						STORE_PIXEL_RGBA8888
899 						DITHER_TAIL
900 						break;
901 					case MPEG3_ARGB8888:
902 						DITHER_HEAD
903 						STORE_PIXEL_ARGB8888
904 						DITHER_TAIL
905 						break;
906 					case MPEG3_601_BGR888:
907 						DITHER_601_HEAD
908 						STORE_PIXEL_BGR888
909 						DITHER_TAIL
910 						break;
911 					case MPEG3_601_BGRA8888:
912 						DITHER_601_HEAD
913 						STORE_PIXEL_RGB565 /*? JMM */
914 						DITHER_TAIL
915 						break;
916 					case MPEG3_601_RGB565:
917 						DITHER_601_HEAD
918 						STORE_PIXEL_RGB565
919 						DITHER_TAIL
920 						break;
921 					case MPEG3_601_RGB555:
922 						DITHER_601_HEAD
923 						STORE_PIXEL_RGB555
924 						DITHER_TAIL
925 						break;
926 					case MPEG3_601_RGBI555:
927 						DITHER_601_HEAD
928 						STORE_PIXEL_RGBI555
929 						DITHER_TAIL
930 						break;
931 					case MPEG3_601_RGB888:
932 						DITHER_601_HEAD
933 						STORE_PIXEL_RGB888
934 						DITHER_TAIL
935 						break;
936 					case MPEG3_601_RGBA8888:
937 						DITHER_601_HEAD
938 						STORE_PIXEL_RGBA8888
939 						DITHER_TAIL
940 						break;
941 					case MPEG3_601_ARGB8888:
942 						DITHER_601_HEAD
943 						STORE_PIXEL_ARGB8888
944 						DITHER_TAIL
945 						break;
946 					case MPEG3_RGBA16161616:
947 					{
948 						register unsigned short *data_s = (unsigned short*)data;
949 						DITHER_HEAD
950 						STORE_PIXEL_RGBA16161616
951 						DITHER_TAIL
952 					}
953 						break;
954 				}
955 			}
956 		DITHER_ROW_TAIL
957 	} /* End of non-MMX */
958 
959 #ifdef HAVE_MMX
960 	if(video->have_mmx)
961 		__asm__ __volatile__ ("emms");
962 #endif
963 	return 0;
964 }
965 
mpeg3video_ditherframe444(mpeg3video_t * video,unsigned char * src[])966 int mpeg3video_ditherframe444(mpeg3video_t *video, unsigned char *src[])
967 {
968 	return 0;
969 }
970 
mpeg3video_dithertop(mpeg3video_t * video,unsigned char * src[])971 int mpeg3video_dithertop(mpeg3video_t *video, unsigned char *src[])
972 {
973     if (video->color_model == MPEG3_RGB555)
974         return mpeg3video_ditherframeFastRGB555(video, src, video->output_rows);
975     else
976         if (video->color_model == MPEG3_ARGB8888)
977             return mpeg3video_ditherframeFastRGBA(video, src, video->output_rows);
978         else
979 	return mpeg3video_ditherframe(video, src, video->output_rows);
980 }
981 
mpeg3video_dithertop444(mpeg3video_t * video,unsigned char * src[])982 int mpeg3video_dithertop444(mpeg3video_t *video, unsigned char *src[])
983 {
984 	return 0;
985 }
986 
mpeg3video_ditherbot(mpeg3video_t * video,unsigned char * src[])987 int mpeg3video_ditherbot(mpeg3video_t *video, unsigned char *src[])
988 {
989 	return 0;
990 }
991 
mpeg3video_ditherbot444(mpeg3video_t * video,unsigned char * src[])992 int mpeg3video_ditherbot444(mpeg3video_t *video, unsigned char *src[])
993 {
994 	return 0;
995 }
996 
memcpy_fast(unsigned char * output,unsigned char * input,long len)997 void memcpy_fast(unsigned char *output, unsigned char *input, long len)
998 {
999 	int i, len2;
1000 /* 8 byte alignment */
1001 /*
1002  * 	if(!((long)input & 0x7))
1003  * 	{
1004  * 		len2 = len >> 4;
1005  * 		for(i = 0; i < len2; )
1006  * 		{
1007  * 			((MPEG3_INT64*)output)[i] = ((MPEG3_INT64*)input)[i];
1008  * 			i++;
1009  * 			((MPEG3_INT64*)output)[i] = ((MPEG3_INT64*)input)[i];
1010  * 			i++;
1011  * 		}
1012  *
1013  * 		for(i *= 16; i < len; i++)
1014  * 		{
1015  * 			output[i] = input[i];
1016  * 		}
1017  * 	}
1018  * 	else
1019  */
1020 		memcpy(output, input, len);
1021 }
1022 
mpeg3video_init_output()1023 int mpeg3video_init_output()
1024 {
1025 	int i, value;
1026 	for(i = 0; i < 256; i++)
1027 	{
1028 		value = (int)(1.1644 * i - 255 * 0.0627 + 0.5);
1029 		if(value < 0) value = 0;
1030 		else
1031 		if(value > 255) value = 255;
1032 		mpeg3_601_to_rgb[i] = value;
1033 	}
1034 	return 0;
1035 }
1036 
mpeg3video_ditherframeFastRGBA(mpeg3video_t * video,unsigned char ** src,unsigned char ** output_rows)1037 int mpeg3video_ditherframeFastRGBA(mpeg3video_t *video, unsigned char **src, unsigned char **output_rows) {
1038 	int h = 0;
1039 	register unsigned char *y_in, *cb_in, *cr_in, *clipArray_ptr;
1040 	long y_l, r_l, b_l, g_l;
1041 	register unsigned long *data;
1042 	register int uv_subscript, step, w = -1,t1,t2,alpha;
1043     register long *cr_to_gPtr,*cr_to_rPtr,*cb_to_bPtr,*cb_to_gPtr;
1044 
1045 	clipArray_ptr = gClipArray_ptr;
1046 	cr_to_rPtr = &video->cr_to_r[0];
1047 	cr_to_gPtr = &video->cr_to_g[0];
1048 	cb_to_bPtr = &video->cb_to_b[0];
1049 	cb_to_gPtr = &video->cb_to_g[0];
1050 	alpha = 0xFF000000;
1051 
1052 	for(h = 0; h < video->out_h; h++)
1053     	{
1054     		t1 = video->y_table[h] + video->in_y;
1055     		t2 = (t1 >> 1) * video->chrom_width;
1056     		y_in  = &src[0][t1 * video->coded_picture_width] + video->in_x;
1057     		cb_in = &src[1][t2] + (video->in_x >> 2);
1058     		cr_in = &src[2][t2] + (video->in_x >> 1);
1059     		data = (unsigned long *) output_rows[h];
1060 
1061             if(video->out_w == video->horizontal_size) {
1062                 for(w = 0; w < video->horizontal_size; w++)  {
1063              		y_l = *y_in++;
1064              		r_l = g_l = b_l = y_l << 16;
1065              		g_l = (g_l + cr_to_gPtr[*cr_in] + cb_to_gPtr[*cb_in]) >> 16;
1066              		r_l = (r_l + cr_to_rPtr[*cr_in])  >> 16;
1067              		b_l = (b_l + cb_to_bPtr[*cb_in])  >> 16;
1068                  	*data++ = alpha |(clipArray_ptr[r_l] << 16) | (clipArray_ptr[g_l] << 8) | clipArray_ptr[b_l];
1069                 	if(w & 1) {
1070                     	cr_in++;
1071                     	cb_in++;
1072                 	}
1073                 }
1074             } else {
1075                 for(w = 0; w < video->out_w; w++)
1076             	   {
1077             		uv_subscript = video->x_table[w] / 2;
1078             		r_l = g_l = b_l = (y_in[video->x_table[w]]) << 16;
1079             		t1 = cr_in[uv_subscript];
1080             		t2 = cb_in[uv_subscript];
1081              		g_l = (g_l + cr_to_gPtr[t1] + cb_to_gPtr[t2]) >> 16;
1082             		r_l = (r_l + cr_to_rPtr[t1]) >> 16;
1083              		b_l = (b_l + cb_to_bPtr[t2]) >> 16;
1084                  	*data++ = alpha | (clipArray_ptr[r_l] << 16) | (clipArray_ptr[g_l] << 8) | clipArray_ptr[b_l];
1085                     }
1086                 }
1087             }
1088     return 0;
1089 }
1090 
mpeg3video_ditherframeFastRGB555(mpeg3video_t * video,unsigned char ** src,unsigned char ** output_rows)1091 int mpeg3video_ditherframeFastRGB555(mpeg3video_t *video, unsigned char **src, unsigned char **output_rows) {
1092 	int h = 0;
1093 	register unsigned char *y_in, *cb_in, *cr_in, *clipArray_ptr;
1094 	register unsigned short *clipArray_ptr16r,*clipArray_ptr16g,*clipArray_ptr16b;
1095 	long y_l, r_l, b_l, g_l;
1096 	register unsigned short *data;
1097 	register int uv_subscript, step, w = -1,t1,t2,video_horizontal_size;
1098     register long *cr_to_gPtr,*cr_to_rPtr,*cb_to_bPtr,*cb_to_gPtr;;
1099 
1100 	clipArray_ptr = gClipArray_ptr;
1101 	clipArray_ptr16r = gClipArray_ptr16r;
1102 	clipArray_ptr16g = gClipArray_ptr16g;
1103 	clipArray_ptr16b = gClipArray_ptr16b;
1104 	cr_to_rPtr = &video->cr_to_r[0];
1105 	cr_to_gPtr = &video->cr_to_g[0];
1106 	cb_to_bPtr = &video->cb_to_b[0];
1107 	cb_to_gPtr = &video->cb_to_g[0];
1108 	video_horizontal_size = video->horizontal_size;
1109 	for(h = 0; h < video->out_h; h++)
1110     	{
1111     		t1 = video->y_table[h] + video->in_y;
1112     		t2 = (t1 >> 1) * video->chrom_width;
1113     		y_in  = &src[0][t1 * video->coded_picture_width] + video->in_x;
1114     		cb_in = &src[1][t2] + (video->in_x >> 2);
1115     		cr_in = &src[2][t2] + (video->in_x >> 1);
1116     		data = (unsigned short*) output_rows[h];
1117 
1118             if(video->out_w == video_horizontal_size) {
1119                 for(w = 0; w < video_horizontal_size; w++)  {
1120              		y_l = *y_in++;
1121              		r_l = g_l = b_l = y_l << 16;
1122              		g_l = (g_l + cr_to_gPtr[*cr_in] + cb_to_gPtr[*cb_in]) >> 16;
1123              		r_l = (r_l + cr_to_rPtr[*cr_in])  >> 16;
1124              		b_l = (b_l + cb_to_bPtr[*cb_in])  >> 16;
1125                     *data++ =   clipArray_ptr16r[r_l] |
1126                 		        clipArray_ptr16g[g_l] |
1127                 		        clipArray_ptr16b[b_l];
1128                 	if(w & 1) {
1129                     	cr_in++;
1130                     	cb_in++;
1131                 	}
1132                 }
1133             } else {
1134                 for(w = 0; w < video->out_w; w++)
1135             	   {
1136             		uv_subscript = video->x_table[w] / 2;
1137             		r_l = g_l = b_l = (y_in[video->x_table[w]]) << 16;
1138             		t1 = cr_in[uv_subscript];
1139             		t2 = cb_in[uv_subscript];
1140              		g_l = (g_l + cr_to_gPtr[t1] + cb_to_gPtr[t2]) >> 16;
1141             		r_l = (r_l + cr_to_rPtr[t1]) >> 16;
1142              		b_l = (b_l + cb_to_bPtr[t2]) >> 16;
1143                     *data++ =   clipArray_ptr16r[r_l] |
1144                 		        clipArray_ptr16g[g_l] |
1145                 		        clipArray_ptr16b[b_l];
1146                 }
1147             }
1148         }
1149 
1150 
1151     return 0;
1152 }
1153 
mpeg3video_present_frame(mpeg3video_t * video)1154 int mpeg3video_present_frame(mpeg3video_t *video)
1155 {
1156 	int i, j, k, l, h;
1157 	unsigned char **src = video->output_src;
1158 
1159 	if (doClippingArrays) {
1160 		for(h=-512;h<=512;h++) {
1161 			gClipArray_ptr[h]=CLIP(h);
1162 			gClipArray_ptr16r[h]=(CLIP(h) & 0xf8) << 7;
1163 			gClipArray_ptr16g[h]=(CLIP(h) & 0xf8) << 2;
1164 			gClipArray_ptr16b[h]=(CLIP(h) & 0xf8) >> 3;
1165 			if (gClipArray_ptr[h] == 0x00)
1166 				gClipArray_ptr[h] = 0x01;
1167 			if (gClipArray_ptr16b[h] == 0x00)
1168 				gClipArray_ptr16b[h] = 0x01;
1169 		}
1170 		doClippingArrays = 0;
1171 	}
1172 
1173 /* Copy YUV buffers */
1174 	if(video->want_yvu)
1175 	{
1176 		long size[2];
1177 		long offset[2];
1178 
1179 /* Drop a frame */
1180 		if(!video->y_output) return 0;
1181 
1182 /* Copy a frame */
1183 		if(video->in_x == 0 &&
1184 			video->in_w >= video->coded_picture_width)
1185 		{
1186 			size[0] = video->coded_picture_width * video->in_h;
1187 			size[1] = video->chrom_width * (int)((float)video->in_h / 2 + 0.5);
1188 			offset[0] = video->coded_picture_width * video->in_y;
1189 			offset[1] = video->chrom_width * (int)((float)video->in_y / 2 + 0.5);
1190 
1191 /*
1192  * 			if(video->in_y > 0)
1193  * 			{
1194  * 				offset[1] += video->chrom_width / 2;
1195  * 				size[1] += video->chrom_width / 2;
1196  * 			}
1197  */
1198 
1199 			memcpy(video->y_output, src[0] + offset[0], size[0]);
1200 			memcpy(video->u_output, src[1] + offset[1], size[1]);
1201 			memcpy(video->v_output, src[2] + offset[1], size[1]);
1202 		}
1203 		else
1204 		{
1205 			for(i = 0, j = video->in_y; i < video->in_h; i++, j++)
1206 			{
1207 				memcpy(video->y_output + i * video->in_w,
1208 					src[0] + j * video->coded_picture_width + video->in_x,
1209 					video->in_w);
1210 				memcpy(video->u_output + i * video->in_w / 4,
1211 					src[1] + j * video->chrom_width / 2 + video->in_x / 4,
1212 					video->in_w / 4);
1213 				memcpy(video->v_output + i * video->in_w / 4,
1214 					src[2] + j * video->chrom_width / 2 + video->in_x / 4,
1215 					video->in_w / 4);
1216 			}
1217 		}
1218 
1219 		return 0;
1220 	}
1221 
1222 /* Want RGB buffer */
1223 /* Copy the frame to the output with YUV to RGB conversion */
1224   	if(video->prog_seq)
1225 	{
1226     	if(video->chroma_format != CHROMA444)
1227 		{
1228     		if (video->color_model == MPEG3_RGB555)
1229         	    mpeg3video_ditherframeFastRGB555(video, src, video->output_rows);
1230     		else
1231     		    if (video->color_model == MPEG3_ARGB8888)
1232         	      mpeg3video_ditherframeFastRGBA(video, src, video->output_rows);
1233     		    else
1234     		mpeg3video_ditherframe(video, src, video->output_rows);
1235     	}
1236     	else
1237     	  	mpeg3video_ditherframe444(video, src);
1238   	}
1239 	else
1240 	{
1241    		if((video->pict_struct == FRAME_PICTURE && video->topfirst) ||
1242 			video->pict_struct == BOTTOM_FIELD)
1243 		{
1244 /* top field first */
1245     		if(video->chroma_format != CHROMA444)
1246 			{
1247         		mpeg3video_dithertop(video, src);
1248         		mpeg3video_ditherbot(video, src);
1249     		}
1250     		else
1251 			{
1252         		mpeg3video_dithertop444(video, src);
1253         		mpeg3video_ditherbot444(video, src);
1254     		}
1255     	}
1256     	else
1257 		{
1258 /* bottom field first */
1259     		if(video->chroma_format != CHROMA444)
1260 			{
1261         		mpeg3video_ditherbot(video, src);
1262         		mpeg3video_dithertop(video, src);
1263     		}
1264     		else
1265 			{
1266         		mpeg3video_ditherbot444(video, src);
1267         		mpeg3video_dithertop444(video, src);
1268     		}
1269     	}
1270   	}
1271 	return 0;
1272 }
1273 
mpeg3video_display_second_field(mpeg3video_t * video)1274 int mpeg3video_display_second_field(mpeg3video_t *video)
1275 {
1276 /* Not used */
1277 	return 0;
1278 }
1279 
1280