1 /*
2 VisualBoyAdvance - a Game Boy & Game Boy Advance emulator
3
4 Copyright (C) 2008 VBA-M development team
5
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20
21 hq filter by Maxim Stepin ( http://hiend3d.com )
22 */
23
24 #ifdef RGB555
25 // 5 bits for green
26 #define GMASK 0x03E0
27 #define RBMASK 0x7C1F
28 // MASK << 1
29 #define GSHIFT1MASK 0x000007C0
30 #define RBSHIFT1MASK 0x0000F83E
31 // MASK << 2
32 #define GSHIFT2MASK 0x00000F80
33 #define RBSHIFT2MASK 0x0001F07C
34 // MASK << 3
35 #define GSHIFT3MASK 0x00001F00
36 #define RBSHIFT3MASK 0x0003E0F8
37 // MASK << 4
38 #define GSHIFT4MASK 0x00003E00
39 #define RBSHIFT4MASK 0x0007C1F0
40 #else
41 // RGB565
42 // 6 bits for green
43 #define GMASK 0x07E0
44 #define RBMASK 0xF81F
45 #define GSHIFT1MASK 0x00000FC0
46 #define RBSHIFT1MASK 0x0001F03E
47 #define GSHIFT2MASK 0x00001F80
48 #define RBSHIFT2MASK 0x0003E07C
49 #define GSHIFT3MASK 0x00003F00
50 #define RBSHIFT3MASK 0x0007C0F8
51 #define GSHIFT4MASK 0x00007E00
52 #define RBSHIFT4MASK 0x000F81F0
53 #endif
54
55
56 // we only need the 32bit version because our YUV format has 32bits
57 #define abs_32( value ) ( ( value ) & 0x7FFFFFFF )
58
59
Diff(unsigned int YUV1,unsigned int YUV2)60 inline bool Diff( unsigned int YUV1, unsigned int YUV2 )
61 {
62 if( YUV1 == YUV2 ) return false; // Save some processing power
63
64 return
65 ( abs_32((YUV1 & 0x00FF0000) - (YUV2 & 0x00FF0000)) > 0x00300000 ) ||
66 ( abs_32((YUV1 & 0x0000FF00) - (YUV2 & 0x0000FF00)) > 0x00000700 ) ||
67 ( abs_32((YUV1 & 0x000000FF) - (YUV2 & 0x000000FF)) > 0x00000006 );
68 }
69
70
71
72 // ===============
73 // 32bit routines:
74 // ===============
75
76 // ( c1*3 + c2 ) / 4
77 // hq3x, hq4x
78 #define Interp1_32( pc, c1, c2 ) \
79 ( \
80 *( (unsigned int *)(pc) ) = \
81 ( (c1) == (c2) ) ? c1 : \
82 ( \
83 ( ( \
84 ( ( (c1) & 0x00FF00 ) * 3 ) + \
85 ( (c2) & 0x00FF00 ) \
86 ) & 0x0003FC00 ) \
87 + \
88 ( ( \
89 ( ( (c1) & 0xFF00FF ) * 3 ) + \
90 ( (c2) & 0xFF00FF ) \
91 ) & 0x03FC03FC ) \
92 ) >> 2 \
93 )
94
95
96 // ( c1*2 + c2 + c3 ) / 4
97 // hq3x, hq4x
98 #define Interp2_32( pc, c1, c2, c3 ) \
99 ( \
100 *( (unsigned int *)(pc) ) = \
101 ( ( (c1) == (c2) ) == (c3) ) ? c1 : \
102 ( \
103 ( ( \
104 ( ( (c1) & 0x00FF00 ) * 2 ) + \
105 ( (c2) & 0x00FF00 ) + \
106 ( (c3) & 0x00FF00 ) \
107 ) & 0x0003FC00 ) \
108 + \
109 ( ( \
110 ( ( (c1) & 0xFF00FF ) * 2 ) + \
111 ( (c2) & 0xFF00FF ) + \
112 ( (c3) & 0xFF00FF ) \
113 ) & 0x03FC03FC ) \
114 ) >> 2 \
115 )
116
117
118 // ( c1*7 + c2 ) / 8
119 // hq3x, hq4x
120 #define Interp3_32( pc, c1, c2 ) \
121 ( \
122 *( (unsigned int *)(pc) ) = \
123 ( (c1) == (c2) ) ? c1 : \
124 ( \
125 ( ( \
126 ( ( (c1) & 0x00FF00 ) * 7 ) + \
127 ( (c2) & 0x00FF00 ) \
128 ) & 0x0007F800 ) \
129 + \
130 ( ( \
131 ( ( (c1) & 0xFF00FF ) * 7 ) + \
132 ( (c2) & 0xFF00FF ) \
133 ) & 0x07F807F8 ) \
134 ) >> 3 \
135 )
136
137
138 // ( c1*2 + (c2+c3)*7 ) / 16
139 // hq3x, not used by hq4x
140 #define Interp4_32( pc, c1, c2, c3 ) \
141 ( \
142 *( (unsigned int *)(pc) ) = \
143 ( ( (c1) == (c2) ) == (c3) ) ? c1 : \
144 ( \
145 ( ( ( ( (c1) & 0x00FF00 ) * 2 ) + ( ( ( (c2) & 0x00FF00 ) + ( (c3) & 0x00FF00 ) ) * 7 ) ) & 0x000FF000 ) + \
146 ( ( ( ( (c1) & 0xFF00FF ) * 2 ) + ( ( ( (c2) & 0xFF00FF ) + ( (c3) & 0xFF00FF ) ) * 7 ) ) & 0x0FF00FF0 ) \
147 ) >> 4 \
148 )
149
150
151 // ( c1 + c2 ) / 2
152 // hq3x, hq4x
153 #define Interp5_32( pc, c1, c2 ) \
154 ( \
155 *( (unsigned int *)(pc) ) = \
156 ( (c1) == (c2) ) ? c1 : \
157 ( \
158 ( ( \
159 ( (c1) & 0x00FF00 ) + \
160 ( (c2) & 0x00FF00 ) \
161 ) & 0x0001FE00 ) \
162 + \
163 ( ( \
164 ( (c1) & 0xFF00FF ) + \
165 ( (c2) & 0xFF00FF ) \
166 ) & 0x01FE01FE ) \
167 ) >> 1 \
168 )
169
170
171 // ( c1*5 + c2*2 + c3 ) / 8
172 // hq4x
173 #define Interp6_32( pc, c1, c2, c3 ) \
174 ( \
175 *( (unsigned int *)(pc) ) = \
176 ( ( (c1) == (c2) ) == (c3) ) ? c1 : \
177 ( \
178 ( ( \
179 ( ( (c1) & 0x00FF00 ) * 5 ) + \
180 ( ( (c2) & 0x00FF00 ) * 2 ) + \
181 ( (c3) & 0x00FF00 ) \
182 ) & 0x0007F800 ) \
183 + \
184 ( ( \
185 ( ( (c1) & 0xFF00FF ) * 5 ) + \
186 ( ( (c2) & 0xFF00FF ) * 2 ) + \
187 ( (c3) & 0xFF00FF ) \
188 ) & 0x07F807F8 ) \
189 ) >> 3 \
190 )
191
192
193 // ( c1*6 + c2 + c3 ) / 8
194 // hq4x
195 #define Interp7_32( pc, c1, c2, c3 ) \
196 ( \
197 *( (unsigned int *)(pc) ) = \
198 ( ( (c1) == (c2) ) == (c3) ) ? c1 : \
199 ( \
200 ( ( \
201 ( ( (c1) & 0x00FF00 ) * 6 ) + \
202 ( (c2) & 0x00FF00 ) + \
203 ( (c3) & 0x00FF00 ) \
204 ) & 0x0007F800 ) \
205 + \
206 ( ( \
207 ( ( (c1) & 0xFF00FF ) * 6 ) + \
208 ( (c2) & 0xFF00FF ) + \
209 ( (c3) & 0xFF00FF ) \
210 ) & 0x07F807F8 ) \
211 ) >> 3 \
212 )
213
214
215 // ( c1*5 + c2*3 ) / 8
216 // hq4x
217 #define Interp8_32( pc, c1, c2 ) \
218 ( \
219 *( (unsigned int *)(pc) ) = \
220 ( (c1) == (c2) ) ? c1 : \
221 ( \
222 ( ( \
223 ( ( (c1) & 0x00FF00 ) * 5 ) + \
224 ( ( (c2) & 0x00FF00 ) * 3 ) \
225 ) & 0x0007F800 ) \
226 + \
227 ( ( \
228 ( ( (c1) & 0xFF00FF ) * 5 ) + \
229 ( ( (c2) & 0xFF00FF ) * 3 ) \
230 ) & 0x07F807F8 ) \
231 ) >> 3 \
232 )
233
234
235 // 32 bit input color
236 // 0x00YYUUVV return value
RGBtoYUV_32(unsigned int c)237 inline unsigned int RGBtoYUV_32( unsigned int c )
238 {
239 // Division through 3 slows down the emulation about 10% !!!
240
241 register unsigned char r, g, b;
242 b = c & 0x0000FF;
243 g = ( c & 0x00FF00 ) >> 8;
244 r = c >> 16;
245 return ( (r + g + b) << 14 ) +
246 ( ( r - b + 512 ) << 4 ) +
247 ( ( 2*g - r - b ) >> 3 ) + 128;
248
249 // unoptimized:
250 //unsigned char r, g, b, Y, u, v;
251 //b = (c & 0x000000FF);
252 //g = (c & 0x0000FF00) >> 8;
253 //r = (c & 0x00FF0000) >> 16;
254 //Y = (r + g + b) >> 2;
255 //u = 128 + ((r - b) >> 2);
256 //v = 128 + ((-r + 2*g -b)>>3);
257 //return (Y<<16) + (u<<8) + v;
258 }
259
260
261
262 // ===============
263 // 16bit routines:
264 // ===============
265
266 // ( c1*3 + c2 ) / 4
267 // hq3x, hq4x
268 #define Interp1_16( pc, c1, c2 ) \
269 ( \
270 *( (unsigned short *)(pc) ) = \
271 ( (c1) == (c2) ) ? c1 : \
272 ( \
273 ( ( \
274 ( ( (c1) & GMASK ) * 3 ) + \
275 ( (c2) & GMASK ) \
276 ) & GSHIFT2MASK ) \
277 + \
278 ( ( \
279 ( ( (c1) & RBMASK ) * 3 ) + \
280 ( (c2) & RBMASK ) \
281 ) & RBSHIFT2MASK ) \
282 ) >> 2 \
283 )
284
285
286 // ( c1*2 + c2 + c3 ) / 4
287 // hq3x, hq4x
288 #define Interp2_16( pc, c1, c2, c3 ) \
289 ( \
290 *( (unsigned short *)(pc) ) = \
291 ( ( (c1) == (c2) ) == (c3) ) ? c1 : \
292 ( \
293 ( ( \
294 ( ( (c1) & GMASK ) * 2 ) + \
295 ( (c2) & GMASK ) + \
296 ( (c3) & GMASK ) \
297 ) & GSHIFT2MASK ) \
298 + \
299 ( ( \
300 ( ( (c1) & RBMASK ) * 2 ) + \
301 ( (c2) & RBMASK ) + \
302 ( (c3) & RBMASK ) \
303 ) & RBSHIFT2MASK ) \
304 ) >> 2 \
305 )
306
307
308 // ( c1*7 + c2 ) / 8
309 // hq3x, hq4x
310 #define Interp3_16( pc, c1, c2 ) \
311 ( \
312 *( (unsigned short *)(pc) ) = \
313 ( (c1) == (c2) ) ? c1 : \
314 ( \
315 ( ( \
316 ( ( (c1) & GMASK ) * 7 ) + \
317 ( (c2) & GMASK ) \
318 ) & GSHIFT3MASK ) \
319 + \
320 ( ( \
321 ( ( (c1) & RBMASK ) * 7 ) + \
322 ( (c2) & RBMASK ) \
323 ) & RBSHIFT3MASK ) \
324 ) >> 3 \
325 )
326
327
328 // ( c1*2 + (c2+c3)*7 ) / 16
329 // hq3x, not used by hq4x
330 #define Interp4_16( pc, c1, c2, c3 ) \
331 ( \
332 *( (unsigned short *)(pc) ) = \
333 ( ( (c1) == (c2) ) == (c3) ) ? c1 : \
334 ( \
335 ( ( ( ( (c1) & GMASK ) * 2 ) + ( ( ( (c2) & GMASK ) + ( (c3) & GMASK ) ) * 7 ) ) & GSHIFT4MASK ) + \
336 ( ( ( ( (c1) & RBMASK ) * 2 ) + ( ( ( (c2) & RBMASK ) + ( (c3) & RBMASK ) ) * 7 ) ) & RBSHIFT4MASK ) \
337 ) >> 4 \
338 )
339
340
341 // ( c1 + c2 ) / 2
342 // hq3x, hq4x
343 #define Interp5_16( pc, c1, c2 ) \
344 ( \
345 *( (unsigned short *)(pc) ) = \
346 ( (c1) == (c2) ) ? c1 : \
347 ( \
348 ( ( \
349 ( (c1) & GMASK ) + \
350 ( (c2) & GMASK ) \
351 ) & GSHIFT1MASK ) \
352 + \
353 ( ( \
354 ( (c1) & RBMASK ) + \
355 ( (c2) & RBMASK ) \
356 ) & RBSHIFT1MASK ) \
357 ) >> 1 \
358 )
359
360
361 // ( c1*5 + c2*2 + c3 ) / 8
362 // hq4x
363 #define Interp6_16( pc, c1, c2, c3 ) \
364 ( \
365 *( (unsigned short *)(pc) ) = \
366 ( ( (c1) == (c2) ) == (c3) ) ? c1 : \
367 ( \
368 ( ( \
369 ( ( (c1) & GMASK ) * 5 ) + \
370 ( ( (c2) & GMASK ) * 2 ) + \
371 ( (c3) & GMASK ) \
372 ) & GSHIFT3MASK ) \
373 + \
374 ( ( \
375 ( ( (c1) & RBMASK ) * 5 ) + \
376 ( ( (c2) & RBMASK ) * 2 ) + \
377 ( (c3) & RBMASK ) \
378 ) & RBSHIFT3MASK ) \
379 ) >> 3 \
380 )
381
382
383 // ( c1*6 + c2 + c3 ) / 8
384 // hq4x
385 #define Interp7_16( pc, c1, c2, c3 ) \
386 ( \
387 *( (unsigned short *)(pc) ) = \
388 ( ( (c1) == (c2) ) == (c3) ) ? c1 : \
389 ( \
390 ( ( \
391 ( ( (c1) & GMASK ) * 6 ) + \
392 ( (c2) & GMASK ) + \
393 ( (c3) & GMASK ) \
394 ) & GSHIFT3MASK ) \
395 + \
396 ( ( \
397 ( ( (c1) & RBMASK ) * 6 ) + \
398 ( (c2) & RBMASK ) + \
399 ( (c3) & RBMASK ) \
400 ) & RBSHIFT3MASK ) \
401 ) >> 3 \
402 )
403
404
405 // ( c1*5 + c2*3 ) / 8
406 // hq4x
407 #define Interp8_16( pc, c1, c2 ) \
408 ( \
409 *( (unsigned short *)(pc) ) = \
410 ( (c1) == (c2) ) ? c1 : \
411 ( \
412 ( ( \
413 ( ( (c1) & GMASK ) * 5 ) + \
414 ( ( (c2) & GMASK ) * 3 ) \
415 ) & GSHIFT3MASK ) \
416 + \
417 ( ( \
418 ( ( (c1) & RBMASK ) * 5 ) + \
419 ( ( (c2) & RBMASK ) * 3 ) \
420 ) & RBSHIFT3MASK ) \
421 ) >> 3 \
422 )
423
424
425 // 16 bit input color
426 // 0x00YYUUVV return value
RGBtoYUV_16(unsigned short c)427 inline unsigned int RGBtoYUV_16( unsigned short c )
428 {
429 // Division through 3 slows down the emulation about 10% !!!
430
431 register unsigned char r, g, b;
432 #ifdef RGB555
433 r = ( c & 0x7C00 ) >> 7;
434 g = ( c & 0x03E0 ) >> 2;
435 b = ( c & 0x001F ) << 3;
436 #else
437 r = ( c & 0xF800 ) >> 8;
438 g = ( c & 0x07E0 ) >> 3;
439 b = ( c & 0x001F ) << 3;
440 #endif
441
442 return ( (r + g + b) << 14 ) +
443 ( ( r - b + 512 ) << 4 ) +
444 ( ( 2*g - r - b ) >> 3 ) + 128;
445 }
446