1 // VisualBoyAdvance - Nintendo Gameboy/GameboyAdvance (TM) emulator.
2 // Copyright (C) 1999-2003 Forgotten
3 // Copyright (C) 2005 Forgotten and the VBA development team
4 
5 // This program is free software; you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by
7 // the Free Software Foundation; either version 2, or(at your option)
8 // any later version.
9 //
10 // This program is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 // GNU General Public License for more details.
14 //
15 // You should have received a copy of the GNU General Public License
16 // along with this program; if not, write to the Free Software Foundation,
17 // Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 
19 #include "hq_shared32.h"
20 
21 const unsigned __int64 reg_blank = 0x0000000000000000;
22 const unsigned __int64 const7    = 0x0000000700070007;
23 const unsigned __int64 treshold  = 0x0000000000300706;
24 
Interp1(unsigned char * pc,unsigned int c1,unsigned int c2)25 void Interp1(unsigned char * pc, unsigned int c1, unsigned int c2)
26 {
27 	//*((int*)pc) = (c1*3+c2)/4;
28 
29 #ifdef MMX
30 	__asm
31 	{
32 		mov		eax, pc
33 		movd	mm1, c1
34 		movd	mm2, c2
35 		movq	mm0, mm1
36 		pslld	mm0, 2
37 		psubd	mm0, mm1
38 		paddd	mm0, mm2
39 		psrld	mm0, 2
40 		movd	[eax], mm0
41 		EMMS
42 	}
43 #else
44 	__asm
45 	{
46 		mov        eax, pc
47 		mov        edx, c1
48 		shl        edx, 2
49 		add        edx, c2
50 		sub        edx, c1
51 		shr        edx, 2
52 		mov        [eax], edx
53 	}
54 #endif
55 }
56 
Interp2(unsigned char * pc,unsigned int c1,unsigned int c2,unsigned int c3)57 void Interp2(unsigned char * pc, unsigned int c1, unsigned int c2, unsigned int c3)
58 {
59 	//*((int*)pc) = (c1*2+c2+c3)/4;
60 
61 #ifdef MMX
62 	__asm
63 	{
64 		mov		eax, pc
65 		movd	mm0, c1
66 		movd	mm1, c2
67 		movd	mm2, c3
68 		pslld	mm0, 1
69 		paddd	mm0, mm1
70 		paddd	mm0, mm2
71 		psrad	mm0, 2
72 		movd [eax], mm0
73 		EMMS
74 	}
75 #else
76 	__asm
77 	{
78 		mov        eax, pc
79 		mov        edx, c1
80 		shl        edx, 1
81 		add        edx, c2
82 		add        edx, c3
83 		shr        edx, 2
84 		mov        [eax], edx
85 	}
86 #endif
87 }
88 
Interp3(unsigned char * pc,unsigned int c1,unsigned int c2)89 void Interp3(unsigned char * pc, unsigned int c1, unsigned int c2)
90 {
91 	//*((int*)pc) = (c1*7+c2)/8;
92 	//*((int*)pc) = ((((c1 & 0x00FF00)*7 + (c2 & 0x00FF00) ) & 0x0007F800) +
93 	//	            (((c1 & 0xFF00FF)*7 + (c2 & 0xFF00FF) ) & 0x07F807F8)) >> 3;
94 
95 #ifdef MMX
96 	__asm
97 	{
98 		mov        eax, pc
99 		movd       mm1, c1
100 		movd       mm2, c2
101 		punpcklbw  mm1, reg_blank
102 		punpcklbw  mm2, reg_blank
103 		pmullw     mm1, const7
104 		paddw      mm1, mm2
105 		psrlw      mm1, 3
106 		packuswb   mm1, reg_blank
107 		movd       [eax], mm1
108 		EMMS
109 	}
110 #else
111 	__asm
112 	{
113 		mov		eax, c1
114 		mov		ebx, c2
115 		mov		ecx, eax
116 		shl		ecx, 3
117 		sub		ecx, eax
118 		add		ecx, ebx
119 		shr		ecx, 3
120 		mov		eax, pc
121 		mov		[eax], ecx
122 	}
123 #endif
124 }
125 
Interp4(unsigned char * pc,unsigned int c1,unsigned int c2,unsigned int c3)126 void Interp4(unsigned char * pc, unsigned int c1, unsigned int c2, unsigned int c3)
127 {
128 	//*((int*)pc) = (c1*2+(c2+c3)*7)/16;
129 	//*((int*)pc) = ((((c1 & 0x00FF00)*2 + ((c2 & 0x00FF00) + (c3 & 0x00FF00))*7 ) & 0x000FF000) +
130 	//              (((c1 & 0xFF00FF)*2 + ((c2 & 0xFF00FF) + (c3 & 0xFF00FF))*7 ) & 0x0FF00FF0)) >> 4;
131 
132 #ifdef MMX
133 	__asm
134 	{
135 		mov        eax, pc
136 		movd       mm1, c1
137 		movd       mm2, c2
138 		movd       mm3, c3
139 		punpcklbw  mm1, reg_blank
140 		punpcklbw  mm2, reg_blank
141 		punpcklbw  mm3, reg_blank
142 		psllw      mm1, 1
143 		paddw      mm2, mm3
144 		pmullw     mm2, const7
145 		paddw      mm1, mm2
146 		psrlw      mm1, 4
147 		packuswb   mm1, reg_blank
148 		movd       [eax], mm1
149 		EMMS
150 	}
151 #else
152 
153 	__asm
154 	{
155 		mov		eax, [c1]
156 		and		eax, 0FF00h
157 		shl		eax, 1
158 		mov		ecx, [c2]
159 		and		ecx, 0FF00h
160 		mov		edx, [c3]
161 		and		edx, 0FF00h
162 		add		ecx, edx
163 		imul	ecx, ecx,7
164 		add		eax, ecx
165 		and		eax, 0FF000h
166 
167 		mov		ebx, [c1]
168 		and		ebx, 0FF00FFh
169 		shl		ebx, 1
170 		mov		ecx, [c2]
171 		and		ecx, 0FF00FFh
172 		mov		edx, [c3]
173 		and		edx, 0FF00FFh
174 		add		ecx, edx
175 		imul	ecx, ecx,7
176 		add		ebx, ecx
177 		and		ebx, 0FF00FF0h
178 
179 		add		eax, ebx
180 		shr		eax, 4
181 
182 		mov		ebx, pc
183 		mov		[ebx], eax
184 	}
185 #endif
186 }
187 
Interp5(unsigned char * pc,unsigned int c1,unsigned int c2)188 void Interp5(unsigned char * pc, unsigned int c1, unsigned int c2)
189 {
190 	//*((int*)pc) = (c1+c2)/2;
191 
192 #ifdef MMX
193 	__asm
194 	{
195 		mov		eax, pc
196 		movd	mm0, c1
197 		movd	mm1, c2
198 		paddd	mm0, mm1
199 		psrad	mm0, 1
200 		movd	[eax], mm0
201 		EMMS
202 	}
203 #else
204 	__asm
205 	{
206 		mov        eax, pc
207 		mov        edx, c1
208 		add        edx, c2
209 		shr        edx, 1
210 		mov        [eax], edx
211 	}
212 #endif
213 }
214 
215 
216 #include "interp.h"
217 
Interp1_16(unsigned char * pc,unsigned short c1,unsigned short c2)218 void Interp1_16(unsigned char * pc, unsigned short c1, unsigned short c2)
219 {
220 	*((unsigned short*)pc) = interp_16_31(c1, c2);
221 	//*((int*)pc) = (c1*3+c2)/4;
222 }
223 
Interp2_16(unsigned char * pc,unsigned short c1,unsigned short c2,unsigned short c3)224 void Interp2_16(unsigned char * pc, unsigned short c1, unsigned short c2, unsigned short c3)
225 {
226 	*((unsigned short*)pc) = interp_16_211(c1, c2, c3);
227 	//*((int*)pc) = (c1*2+c2+c3)/4;
228 }
229 
Interp3_16(unsigned char * pc,unsigned short c1,unsigned short c2)230 void Interp3_16(unsigned char * pc, unsigned short c1, unsigned short c2)
231 {
232 	*((unsigned short*)pc) = interp_16_71(c1, c2);
233 //	*((unsigned short*)pc) = (c1*7+c2)/8;
234 //	*((unsigned short*)pc) = ((((c1 & 0x00FF00)*7 + (c2 & 0x00FF00) ) & 0x0007F800) +
235 //		            (((c1 & 0xFF00FF)*7 + (c2 & 0xFF00FF) ) & 0x07F807F8)) >> 3;
236 }
237 
Interp4_16(unsigned char * pc,unsigned short c1,unsigned short c2,unsigned short c3)238 void Interp4_16(unsigned char * pc, unsigned short c1, unsigned short c2, unsigned short c3)
239 {
240 	*((unsigned short*)pc) = interp_16_772(c2, c3, c1);
241 //	*((unsigned short*)pc) = (c1*2+(c2+c3)*7)/16;
242 //	*((unsigned short*)pc) = ((((c1 & 0x00FF00)*2 + ((c2 & 0x00FF00) + (c3 & 0x00FF00))*7 ) & 0x000FF000) +
243 //	              (((c1 & 0xFF00FF)*2 + ((c2 & 0xFF00FF) + (c3 & 0xFF00FF))*7 ) & 0x0FF00FF0)) >> 4;
244 }
245 
Interp5_16(unsigned char * pc,unsigned short c1,unsigned short c2)246 void Interp5_16(unsigned char * pc, unsigned short c1, unsigned short c2)
247 {
248 	*((unsigned short*)pc) = interp_16_11(c1, c2);
249 }
250 
251 
252 
253 
Diff(unsigned int c1,unsigned int c2)254 bool Diff(unsigned int c1, unsigned int c2)
255 {
256 	unsigned int
257 		YUV1 = RGBtoYUV(c1),
258 		YUV2 = RGBtoYUV(c2);
259 
260 	if (YUV1 == YUV2) return false; // Save some processing power
261 
262 #ifdef MMX
263 	unsigned int retval;
264 	__asm
265 	{
266    		mov		eax, 0x7FFFFFFF
267 		movd	mm7, eax			;mm7 = ABS_MASK = 0x7FFFFFFF
268 
269 		; Copy source colors in first reg
270 		movd	mm0, YUV1
271 		movd	mm1, YUV2
272 
273 		mov		eax, 0x00FF0000
274 		movd	mm6, eax			;mm6 = Ymask = 0x00FF0000
275 
276 		; Calculate color Y difference
277 		movq	mm2, mm0
278 		movq	mm3, mm1
279 		pand	mm2, mm6
280 		pand	mm3, mm6
281 		psubd	mm2, mm3
282 		pand	mm2, mm7
283 
284 		mov		eax, 0x0000FF00
285 		movd	mm6, eax			;mm6 = Umask = 0x0000FF00
286 
287 		; Calculate color U difference
288 		movq	mm3, mm0
289 		movq	mm4, mm1
290 		pand	mm3, mm6
291 		pand	mm4, mm6
292 		psubd	mm3, mm4
293 		pand	mm3, mm7
294 
295 		mov		eax, 0x000000FF
296 		movd	mm6, eax			;mm6 = Vmask = 0x000000FF
297 
298 		; Calculate color V difference
299 		movq	mm4, mm0
300 		movq	mm5, mm1
301 		pand	mm4, mm6
302 		pand	mm5, mm6
303 		psubd	mm4, mm5
304 		pand	mm4, mm7
305 
306    		mov		eax, 0x00300000
307 		movd	mm5, eax			;mm5 = trY = 0x00300000
308    		mov		eax, 0x00000700
309 		movd	mm6, eax			;mm6 = trU = 0x00000700
310    		mov		eax, 0x00000006
311 		movd	mm7, eax			;mm7 = trV = 0x00000006
312 
313 		; Compare the results
314         pcmpgtd	mm2, trY
315         pcmpgtd	mm3, trU
316         pcmpgtd	mm4, trV
317 		por		mm2, mm3
318 		por		mm2, mm4
319 
320 		movd	retval, mm2
321 
322 		EMMS
323 	}
324 	return (retval != 0);
325 #else
326 	return
327 		( abs32((YUV1 & Ymask) - (YUV2 & Ymask)) > trY ) ||
328 		( abs32((YUV1 & Umask) - (YUV2 & Umask)) > trU ) ||
329 		( abs32((YUV1 & Vmask) - (YUV2 & Vmask)) > trV );
330 #endif
331 }
332 
333 
RGBtoYUV(unsigned int c)334 unsigned int RGBtoYUV(unsigned int c)
335 {	// Division through 3 slows down the emulation about 10% !!!
336 #ifdef MMX
337 	unsigned int retval;
338 	__asm
339 	{
340 		movd	mm0, c
341 		movq	mm1, mm0
342 		movq	mm2, mm0		;mm0=mm1=mm2=c
343 
344 		mov		eax, 0x000000FF
345 		movd	mm5, eax		;mm5 = REDMASK = 0x000000FF
346 		mov		eax, 0x0000FF00
347 		movd	mm6, eax		;mm6 = GREENMASK = 0x0000FF00
348 		mov		eax, 0x00FF0000
349 		movd	mm7, eax		;mm7 = BLUEMASK = 0x00FF0000
350 
351 
352 		pand	mm0, mm5
353 		pand	mm1, mm6
354 		pand	mm2, mm7		;mm0=R mm1=G mm2=B
355 
356 		movq	mm3, mm0
357 		paddd	mm3, mm1
358 		paddd	mm3, mm2
359 ;		psrld	mm3, 2			;mm3=Y
360 ;		pslld	mm3, 16
361 		pslld	mm3, 14			;mm3=Y<<16
362 
363 		mov		eax, 512
364 		movd	mm7, eax		;mm7 = 128 << 2 = 512
365 
366 		movq	mm4, mm0
367 		psubd	mm4, mm2
368 ;		psrld	mm4, 2
369 ;		paddd	mm4, mm7		;mm4=U
370 ;		pslld	mm4, 8			;mm4=U<<8
371 		paddd	mm4, mm7
372 		pslld	mm4, 6
373 
374 		mov		eax, 128
375 		movd	mm7, eax		;mm7 = 128
376 
377 		movq	mm5, mm1
378 		pslld	mm5, 1
379 		psubd	mm5, mm0
380 		psubd	mm5, mm2
381 		psrld	mm5, 3
382 		paddd	mm5, mm7		;mm5=V
383 
384 		paddd	mm5, mm4
385 		paddd	mm5, mm3
386 
387         movd	retval, mm5
388 
389 		EMMS
390 	}
391 	return retval;
392 #else
393 	unsigned char r, g, b, Y, u, v;
394 	r = (c & 0x000000FF);
395 	g = (c & 0x0000FF00) >> 8;
396 	b = (c & 0x00FF0000) >> 16;
397 	Y = (r + g + b) >> 2;
398 	u = 128 + ((r - b) >> 2);
399 	v = 128 + ((-r + 2*g -b)>>3);
400 	return (Y<<16) + (u<<8) + v;
401 
402 	// Extremely High Quality Code
403 	//unsigned char r, g, b;
404 	//r = c & 0xFF;
405 	//g = (c >> 8) & 0xFF;
406 	//b = (c >> 16) & 0xFF;
407 	//unsigned char y, u, v;
408 	//y = (0.256788 * r  +  0.504129 * g  +  0.097906 * b) + 16;
409 	//u = (-0.148223 * r  -  0.290993 * g  +  0.439216 * b) + 128;
410 	//v = (0.439216 * r  -  0.367788 * g  -  0.071427 * b) + 128;
411 	//return (y << 16) + (u << 8) + v;
412 #endif
413 }
414