1 // VisualBoyAdvance - Nintendo Gameboy/GameboyAdvance (TM) emulator.
2 // Copyright (C) 1999-2003 Forgotten
3 // Copyright (C) 2005 Forgotten and the VBA development team
4
5 // This program is free software; you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by
7 // the Free Software Foundation; either version 2, or(at your option)
8 // any later version.
9 //
10 // This program is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU General Public License for more details.
14 //
15 // You should have received a copy of the GNU General Public License
16 // along with this program; if not, write to the Free Software Foundation,
17 // Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18
19 #include "hq_shared32.h"
20
21 const unsigned __int64 reg_blank = 0x0000000000000000;
22 const unsigned __int64 const7 = 0x0000000700070007;
23 const unsigned __int64 treshold = 0x0000000000300706;
24
Interp1(unsigned char * pc,unsigned int c1,unsigned int c2)25 void Interp1(unsigned char * pc, unsigned int c1, unsigned int c2)
26 {
27 //*((int*)pc) = (c1*3+c2)/4;
28
29 #ifdef MMX
30 __asm
31 {
32 mov eax, pc
33 movd mm1, c1
34 movd mm2, c2
35 movq mm0, mm1
36 pslld mm0, 2
37 psubd mm0, mm1
38 paddd mm0, mm2
39 psrld mm0, 2
40 movd [eax], mm0
41 EMMS
42 }
43 #else
44 __asm
45 {
46 mov eax, pc
47 mov edx, c1
48 shl edx, 2
49 add edx, c2
50 sub edx, c1
51 shr edx, 2
52 mov [eax], edx
53 }
54 #endif
55 }
56
Interp2(unsigned char * pc,unsigned int c1,unsigned int c2,unsigned int c3)57 void Interp2(unsigned char * pc, unsigned int c1, unsigned int c2, unsigned int c3)
58 {
59 //*((int*)pc) = (c1*2+c2+c3)/4;
60
61 #ifdef MMX
62 __asm
63 {
64 mov eax, pc
65 movd mm0, c1
66 movd mm1, c2
67 movd mm2, c3
68 pslld mm0, 1
69 paddd mm0, mm1
70 paddd mm0, mm2
71 psrad mm0, 2
72 movd [eax], mm0
73 EMMS
74 }
75 #else
76 __asm
77 {
78 mov eax, pc
79 mov edx, c1
80 shl edx, 1
81 add edx, c2
82 add edx, c3
83 shr edx, 2
84 mov [eax], edx
85 }
86 #endif
87 }
88
Interp3(unsigned char * pc,unsigned int c1,unsigned int c2)89 void Interp3(unsigned char * pc, unsigned int c1, unsigned int c2)
90 {
91 //*((int*)pc) = (c1*7+c2)/8;
92 //*((int*)pc) = ((((c1 & 0x00FF00)*7 + (c2 & 0x00FF00) ) & 0x0007F800) +
93 // (((c1 & 0xFF00FF)*7 + (c2 & 0xFF00FF) ) & 0x07F807F8)) >> 3;
94
95 #ifdef MMX
96 __asm
97 {
98 mov eax, pc
99 movd mm1, c1
100 movd mm2, c2
101 punpcklbw mm1, reg_blank
102 punpcklbw mm2, reg_blank
103 pmullw mm1, const7
104 paddw mm1, mm2
105 psrlw mm1, 3
106 packuswb mm1, reg_blank
107 movd [eax], mm1
108 EMMS
109 }
110 #else
111 __asm
112 {
113 mov eax, c1
114 mov ebx, c2
115 mov ecx, eax
116 shl ecx, 3
117 sub ecx, eax
118 add ecx, ebx
119 shr ecx, 3
120 mov eax, pc
121 mov [eax], ecx
122 }
123 #endif
124 }
125
Interp4(unsigned char * pc,unsigned int c1,unsigned int c2,unsigned int c3)126 void Interp4(unsigned char * pc, unsigned int c1, unsigned int c2, unsigned int c3)
127 {
128 //*((int*)pc) = (c1*2+(c2+c3)*7)/16;
129 //*((int*)pc) = ((((c1 & 0x00FF00)*2 + ((c2 & 0x00FF00) + (c3 & 0x00FF00))*7 ) & 0x000FF000) +
130 // (((c1 & 0xFF00FF)*2 + ((c2 & 0xFF00FF) + (c3 & 0xFF00FF))*7 ) & 0x0FF00FF0)) >> 4;
131
132 #ifdef MMX
133 __asm
134 {
135 mov eax, pc
136 movd mm1, c1
137 movd mm2, c2
138 movd mm3, c3
139 punpcklbw mm1, reg_blank
140 punpcklbw mm2, reg_blank
141 punpcklbw mm3, reg_blank
142 psllw mm1, 1
143 paddw mm2, mm3
144 pmullw mm2, const7
145 paddw mm1, mm2
146 psrlw mm1, 4
147 packuswb mm1, reg_blank
148 movd [eax], mm1
149 EMMS
150 }
151 #else
152
153 __asm
154 {
155 mov eax, [c1]
156 and eax, 0FF00h
157 shl eax, 1
158 mov ecx, [c2]
159 and ecx, 0FF00h
160 mov edx, [c3]
161 and edx, 0FF00h
162 add ecx, edx
163 imul ecx, ecx,7
164 add eax, ecx
165 and eax, 0FF000h
166
167 mov ebx, [c1]
168 and ebx, 0FF00FFh
169 shl ebx, 1
170 mov ecx, [c2]
171 and ecx, 0FF00FFh
172 mov edx, [c3]
173 and edx, 0FF00FFh
174 add ecx, edx
175 imul ecx, ecx,7
176 add ebx, ecx
177 and ebx, 0FF00FF0h
178
179 add eax, ebx
180 shr eax, 4
181
182 mov ebx, pc
183 mov [ebx], eax
184 }
185 #endif
186 }
187
Interp5(unsigned char * pc,unsigned int c1,unsigned int c2)188 void Interp5(unsigned char * pc, unsigned int c1, unsigned int c2)
189 {
190 //*((int*)pc) = (c1+c2)/2;
191
192 #ifdef MMX
193 __asm
194 {
195 mov eax, pc
196 movd mm0, c1
197 movd mm1, c2
198 paddd mm0, mm1
199 psrad mm0, 1
200 movd [eax], mm0
201 EMMS
202 }
203 #else
204 __asm
205 {
206 mov eax, pc
207 mov edx, c1
208 add edx, c2
209 shr edx, 1
210 mov [eax], edx
211 }
212 #endif
213 }
214
215
216 #include "interp.h"
217
Interp1_16(unsigned char * pc,unsigned short c1,unsigned short c2)218 void Interp1_16(unsigned char * pc, unsigned short c1, unsigned short c2)
219 {
220 *((unsigned short*)pc) = interp_16_31(c1, c2);
221 //*((int*)pc) = (c1*3+c2)/4;
222 }
223
Interp2_16(unsigned char * pc,unsigned short c1,unsigned short c2,unsigned short c3)224 void Interp2_16(unsigned char * pc, unsigned short c1, unsigned short c2, unsigned short c3)
225 {
226 *((unsigned short*)pc) = interp_16_211(c1, c2, c3);
227 //*((int*)pc) = (c1*2+c2+c3)/4;
228 }
229
Interp3_16(unsigned char * pc,unsigned short c1,unsigned short c2)230 void Interp3_16(unsigned char * pc, unsigned short c1, unsigned short c2)
231 {
232 *((unsigned short*)pc) = interp_16_71(c1, c2);
233 // *((unsigned short*)pc) = (c1*7+c2)/8;
234 // *((unsigned short*)pc) = ((((c1 & 0x00FF00)*7 + (c2 & 0x00FF00) ) & 0x0007F800) +
235 // (((c1 & 0xFF00FF)*7 + (c2 & 0xFF00FF) ) & 0x07F807F8)) >> 3;
236 }
237
Interp4_16(unsigned char * pc,unsigned short c1,unsigned short c2,unsigned short c3)238 void Interp4_16(unsigned char * pc, unsigned short c1, unsigned short c2, unsigned short c3)
239 {
240 *((unsigned short*)pc) = interp_16_772(c2, c3, c1);
241 // *((unsigned short*)pc) = (c1*2+(c2+c3)*7)/16;
242 // *((unsigned short*)pc) = ((((c1 & 0x00FF00)*2 + ((c2 & 0x00FF00) + (c3 & 0x00FF00))*7 ) & 0x000FF000) +
243 // (((c1 & 0xFF00FF)*2 + ((c2 & 0xFF00FF) + (c3 & 0xFF00FF))*7 ) & 0x0FF00FF0)) >> 4;
244 }
245
Interp5_16(unsigned char * pc,unsigned short c1,unsigned short c2)246 void Interp5_16(unsigned char * pc, unsigned short c1, unsigned short c2)
247 {
248 *((unsigned short*)pc) = interp_16_11(c1, c2);
249 }
250
251
252
253
Diff(unsigned int c1,unsigned int c2)254 bool Diff(unsigned int c1, unsigned int c2)
255 {
256 unsigned int
257 YUV1 = RGBtoYUV(c1),
258 YUV2 = RGBtoYUV(c2);
259
260 if (YUV1 == YUV2) return false; // Save some processing power
261
262 #ifdef MMX
263 unsigned int retval;
264 __asm
265 {
266 mov eax, 0x7FFFFFFF
267 movd mm7, eax ;mm7 = ABS_MASK = 0x7FFFFFFF
268
269 ; Copy source colors in first reg
270 movd mm0, YUV1
271 movd mm1, YUV2
272
273 mov eax, 0x00FF0000
274 movd mm6, eax ;mm6 = Ymask = 0x00FF0000
275
276 ; Calculate color Y difference
277 movq mm2, mm0
278 movq mm3, mm1
279 pand mm2, mm6
280 pand mm3, mm6
281 psubd mm2, mm3
282 pand mm2, mm7
283
284 mov eax, 0x0000FF00
285 movd mm6, eax ;mm6 = Umask = 0x0000FF00
286
287 ; Calculate color U difference
288 movq mm3, mm0
289 movq mm4, mm1
290 pand mm3, mm6
291 pand mm4, mm6
292 psubd mm3, mm4
293 pand mm3, mm7
294
295 mov eax, 0x000000FF
296 movd mm6, eax ;mm6 = Vmask = 0x000000FF
297
298 ; Calculate color V difference
299 movq mm4, mm0
300 movq mm5, mm1
301 pand mm4, mm6
302 pand mm5, mm6
303 psubd mm4, mm5
304 pand mm4, mm7
305
306 mov eax, 0x00300000
307 movd mm5, eax ;mm5 = trY = 0x00300000
308 mov eax, 0x00000700
309 movd mm6, eax ;mm6 = trU = 0x00000700
310 mov eax, 0x00000006
311 movd mm7, eax ;mm7 = trV = 0x00000006
312
313 ; Compare the results
314 pcmpgtd mm2, trY
315 pcmpgtd mm3, trU
316 pcmpgtd mm4, trV
317 por mm2, mm3
318 por mm2, mm4
319
320 movd retval, mm2
321
322 EMMS
323 }
324 return (retval != 0);
325 #else
326 return
327 ( abs32((YUV1 & Ymask) - (YUV2 & Ymask)) > trY ) ||
328 ( abs32((YUV1 & Umask) - (YUV2 & Umask)) > trU ) ||
329 ( abs32((YUV1 & Vmask) - (YUV2 & Vmask)) > trV );
330 #endif
331 }
332
333
RGBtoYUV(unsigned int c)334 unsigned int RGBtoYUV(unsigned int c)
335 { // Division through 3 slows down the emulation about 10% !!!
336 #ifdef MMX
337 unsigned int retval;
338 __asm
339 {
340 movd mm0, c
341 movq mm1, mm0
342 movq mm2, mm0 ;mm0=mm1=mm2=c
343
344 mov eax, 0x000000FF
345 movd mm5, eax ;mm5 = REDMASK = 0x000000FF
346 mov eax, 0x0000FF00
347 movd mm6, eax ;mm6 = GREENMASK = 0x0000FF00
348 mov eax, 0x00FF0000
349 movd mm7, eax ;mm7 = BLUEMASK = 0x00FF0000
350
351
352 pand mm0, mm5
353 pand mm1, mm6
354 pand mm2, mm7 ;mm0=R mm1=G mm2=B
355
356 movq mm3, mm0
357 paddd mm3, mm1
358 paddd mm3, mm2
359 ; psrld mm3, 2 ;mm3=Y
360 ; pslld mm3, 16
361 pslld mm3, 14 ;mm3=Y<<16
362
363 mov eax, 512
364 movd mm7, eax ;mm7 = 128 << 2 = 512
365
366 movq mm4, mm0
367 psubd mm4, mm2
368 ; psrld mm4, 2
369 ; paddd mm4, mm7 ;mm4=U
370 ; pslld mm4, 8 ;mm4=U<<8
371 paddd mm4, mm7
372 pslld mm4, 6
373
374 mov eax, 128
375 movd mm7, eax ;mm7 = 128
376
377 movq mm5, mm1
378 pslld mm5, 1
379 psubd mm5, mm0
380 psubd mm5, mm2
381 psrld mm5, 3
382 paddd mm5, mm7 ;mm5=V
383
384 paddd mm5, mm4
385 paddd mm5, mm3
386
387 movd retval, mm5
388
389 EMMS
390 }
391 return retval;
392 #else
393 unsigned char r, g, b, Y, u, v;
394 r = (c & 0x000000FF);
395 g = (c & 0x0000FF00) >> 8;
396 b = (c & 0x00FF0000) >> 16;
397 Y = (r + g + b) >> 2;
398 u = 128 + ((r - b) >> 2);
399 v = 128 + ((-r + 2*g -b)>>3);
400 return (Y<<16) + (u<<8) + v;
401
402 // Extremely High Quality Code
403 //unsigned char r, g, b;
404 //r = c & 0xFF;
405 //g = (c >> 8) & 0xFF;
406 //b = (c >> 16) & 0xFF;
407 //unsigned char y, u, v;
408 //y = (0.256788 * r + 0.504129 * g + 0.097906 * b) + 16;
409 //u = (-0.148223 * r - 0.290993 * g + 0.439216 * b) + 128;
410 //v = (0.439216 * r - 0.367788 * g - 0.071427 * b) + 128;
411 //return (y << 16) + (u << 8) + v;
412 #endif
413 }
414