1 /*
2     VisualBoyAdvance - a Game Boy & Game Boy Advance emulator
3 
4     Copyright (C) 2008 VBA-M development team
5 
6 
7     This program is free software; you can redistribute it and/or modify
8     it under the terms of the GNU General Public License as published by
9     the Free Software Foundation; either version 2 of the License, or
10     (at your option) any later version.
11 
12     This program is distributed in the hope that it will be useful,
13     but WITHOUT ANY WARRANTY; without even the implied warranty of
14     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15     GNU General Public License for more details.
16 
17     You should have received a copy of the GNU General Public License
18     along with this program; if not, write to the Free Software
19     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
20 
21 	hq filter by Maxim Stepin ( http://hiend3d.com )
22 */
23 
24 #ifdef RGB555
25 // 5 bits for green
26 #define GMASK 0x03E0
27 #define RBMASK 0x7C1F
28 // MASK << 1
29 #define GSHIFT1MASK 0x000007C0
30 #define RBSHIFT1MASK 0x0000F83E
31 // MASK << 2
32 #define GSHIFT2MASK 0x00000F80
33 #define RBSHIFT2MASK 0x0001F07C
34 // MASK << 3
35 #define GSHIFT3MASK 0x00001F00
36 #define RBSHIFT3MASK 0x0003E0F8
37 // MASK << 4
38 #define GSHIFT4MASK 0x00003E00
39 #define RBSHIFT4MASK 0x0007C1F0
40 #else
41 // RGB565
42 // 6 bits for green
43 #define GMASK 0x07E0
44 #define RBMASK 0xF81F
45 #define GSHIFT1MASK 0x00000FC0
46 #define RBSHIFT1MASK 0x0001F03E
47 #define GSHIFT2MASK 0x00001F80
48 #define RBSHIFT2MASK 0x0003E07C
49 #define GSHIFT3MASK 0x00003F00
50 #define RBSHIFT3MASK 0x0007C0F8
51 #define GSHIFT4MASK 0x00007E00
52 #define RBSHIFT4MASK 0x000F81F0
53 #endif
54 
55 
56 // we only need the 32bit version because our YUV format has 32bits
57 #define abs_32( value )  ( ( value ) & 0x7FFFFFFF )
58 
59 
Diff(unsigned int YUV1,unsigned int YUV2)60 inline bool Diff( unsigned int YUV1, unsigned int YUV2 )
61 {
62 	if( YUV1 == YUV2 ) return false; // Save some processing power
63 
64 	return
65 		( abs_32((YUV1 & 0x00FF0000) - (YUV2 & 0x00FF0000)) > 0x00300000 ) ||
66 		( abs_32((YUV1 & 0x0000FF00) - (YUV2 & 0x0000FF00)) > 0x00000700 ) ||
67 		( abs_32((YUV1 & 0x000000FF) - (YUV2 & 0x000000FF)) > 0x00000006 );
68 }
69 
70 
71 
72 // ===============
73 // 32bit routines:
74 // ===============
75 
76 // ( c1*3 + c2 ) / 4
77 // hq3x, hq4x
78 #define Interp1_32( pc, c1, c2 ) \
79 ( \
80 	*( (unsigned int *)(pc) ) = \
81 	( (c1) == (c2) ) ? c1 : \
82 	( \
83 		( ( \
84 			( ( (c1) & 0x00FF00 ) * 3 ) + \
85 			( (c2) & 0x00FF00 ) \
86 		) & 0x0003FC00 ) \
87 		+ \
88 		( ( \
89 			( ( (c1) & 0xFF00FF ) * 3 ) + \
90 			( (c2) & 0xFF00FF ) \
91 		) & 0x03FC03FC ) \
92 	) >> 2 \
93 )
94 
95 
96 // ( c1*2 + c2 + c3 ) / 4
97 // hq3x, hq4x
98 #define Interp2_32( pc, c1, c2, c3 ) \
99 ( \
100 	*( (unsigned int *)(pc) ) = \
101 	( ( (c1) == (c2) ) == (c3) ) ? c1 : \
102 	( \
103 		( ( \
104 			( ( (c1) & 0x00FF00 ) * 2 ) + \
105 			( (c2) & 0x00FF00 ) + \
106 			( (c3) & 0x00FF00 ) \
107 		) & 0x0003FC00 ) \
108 		+ \
109 		( ( \
110 			( ( (c1) & 0xFF00FF ) * 2 ) + \
111 			( (c2) & 0xFF00FF ) + \
112 			( (c3) & 0xFF00FF ) \
113 		) & 0x03FC03FC ) \
114 	) >> 2 \
115 )
116 
117 
118 // ( c1*7 + c2 ) / 8
119 // hq3x, hq4x
120 #define Interp3_32( pc, c1, c2 ) \
121 ( \
122 	*( (unsigned int *)(pc) ) = \
123 	( (c1) == (c2) ) ? c1 : \
124 	( \
125 		( ( \
126 			( ( (c1) & 0x00FF00 ) * 7 ) + \
127 			( (c2) & 0x00FF00 ) \
128 		) & 0x0007F800 ) \
129 		+ \
130 		( ( \
131 			( ( (c1) & 0xFF00FF ) * 7 ) + \
132 			( (c2) & 0xFF00FF ) \
133 		) & 0x07F807F8 ) \
134 	) >> 3 \
135 )
136 
137 
138 // ( c1*2 + (c2+c3)*7 ) / 16
139 // hq3x, not used by hq4x
140 #define Interp4_32( pc, c1, c2, c3 ) \
141 ( \
142 	*( (unsigned int *)(pc) ) = \
143 	( ( (c1) == (c2) ) == (c3) ) ? c1 : \
144 	( \
145 	( ( ( ( (c1) & 0x00FF00 ) * 2 ) + ( ( ( (c2) & 0x00FF00 ) + ( (c3) & 0x00FF00 ) ) * 7 ) ) & 0x000FF000 ) + \
146 	( ( ( ( (c1) & 0xFF00FF ) * 2 ) + ( ( ( (c2) & 0xFF00FF ) + ( (c3) & 0xFF00FF ) ) * 7 ) ) & 0x0FF00FF0 ) \
147 	) >> 4 \
148 )
149 
150 
151 // ( c1 + c2 ) / 2
152 // hq3x, hq4x
153 #define Interp5_32( pc, c1, c2 ) \
154 ( \
155 	*( (unsigned int *)(pc) ) = \
156 	( (c1) == (c2) ) ? c1 : \
157 	( \
158 		( ( \
159 			( (c1) & 0x00FF00 ) + \
160 			( (c2) & 0x00FF00 ) \
161 		) & 0x0001FE00 ) \
162 		+ \
163 		( ( \
164 			( (c1) & 0xFF00FF ) + \
165 			( (c2) & 0xFF00FF ) \
166 		) & 0x01FE01FE ) \
167 	) >> 1 \
168 )
169 
170 
171 // ( c1*5 + c2*2 + c3 ) / 8
172 // hq4x
173 #define Interp6_32( pc, c1, c2, c3 ) \
174 ( \
175 	*( (unsigned int *)(pc) ) = \
176 	( ( (c1) == (c2) ) == (c3) ) ? c1 : \
177 	( \
178 		( ( \
179 			( ( (c1) & 0x00FF00 ) * 5 ) + \
180 			( ( (c2) & 0x00FF00 ) * 2 ) + \
181 			( (c3) & 0x00FF00 ) \
182 		) & 0x0007F800 ) \
183 		+ \
184 		( ( \
185 			( ( (c1) & 0xFF00FF ) * 5 ) + \
186 			( ( (c2) & 0xFF00FF ) * 2 ) + \
187 			( (c3) & 0xFF00FF ) \
188 		) & 0x07F807F8 ) \
189 	) >> 3 \
190 )
191 
192 
193 // ( c1*6 + c2 + c3 ) / 8
194 // hq4x
195 #define Interp7_32( pc, c1, c2, c3 ) \
196 ( \
197 	*( (unsigned int *)(pc) ) = \
198 	( ( (c1) == (c2) ) == (c3) ) ? c1 : \
199 	( \
200 		( ( \
201 			( ( (c1) & 0x00FF00 ) * 6 ) + \
202 			( (c2) & 0x00FF00 ) + \
203 			( (c3) & 0x00FF00 ) \
204 		) & 0x0007F800 ) \
205 		+ \
206 		( ( \
207 			( ( (c1) & 0xFF00FF ) * 6 ) + \
208 			( (c2) & 0xFF00FF ) + \
209 			( (c3) & 0xFF00FF ) \
210 		) & 0x07F807F8 ) \
211 	) >> 3 \
212 )
213 
214 
215 // ( c1*5 + c2*3 ) / 8
216 // hq4x
217 #define Interp8_32( pc, c1, c2 ) \
218 ( \
219 	*( (unsigned int *)(pc) ) = \
220 	( (c1) == (c2) ) ? c1 : \
221 	( \
222 		( ( \
223 			( ( (c1) & 0x00FF00 ) * 5 ) + \
224 			( ( (c2) & 0x00FF00 ) * 3 ) \
225 		) & 0x0007F800 ) \
226 		+ \
227 		( ( \
228 			( ( (c1) & 0xFF00FF ) * 5 ) + \
229 			( ( (c2) & 0xFF00FF ) * 3 ) \
230 		) & 0x07F807F8 ) \
231 	) >> 3 \
232 )
233 
234 
235 // 32 bit input color
236 // 0x00YYUUVV return value
RGBtoYUV_32(unsigned int c)237 inline unsigned int RGBtoYUV_32( unsigned int c )
238 {
239 	// Division through 3 slows down the emulation about 10% !!!
240 
241 	register unsigned char r, g, b;
242 	b = c & 0x0000FF;
243 	g = ( c & 0x00FF00 ) >> 8;
244 	r = c >> 16;
245 	return ( (r + g + b) << 14 ) +
246 		( ( r - b + 512 ) << 4 ) +
247 		( ( 2*g - r - b ) >> 3 ) + 128;
248 
249 	// unoptimized:
250 	//unsigned char r, g, b, Y, u, v;
251 	//b = (c & 0x000000FF);
252 	//g = (c & 0x0000FF00) >> 8;
253 	//r = (c & 0x00FF0000) >> 16;
254 	//Y = (r + g + b) >> 2;
255 	//u = 128 + ((r - b) >> 2);
256 	//v = 128 + ((-r + 2*g -b)>>3);
257 	//return (Y<<16) + (u<<8) + v;
258 }
259 
260 
261 
262 // ===============
263 // 16bit routines:
264 // ===============
265 
266 // ( c1*3 + c2 ) / 4
267 // hq3x, hq4x
268 #define Interp1_16( pc, c1, c2 ) \
269 ( \
270 	*( (unsigned short *)(pc) ) = \
271 	( (c1) == (c2) ) ? c1 : \
272 	( \
273 		( ( \
274 			( ( (c1) & GMASK ) * 3 ) + \
275 			( (c2) & GMASK ) \
276 		) & GSHIFT2MASK ) \
277 		+ \
278 		( ( \
279 			( ( (c1) & RBMASK ) * 3 ) + \
280 			( (c2) & RBMASK ) \
281 		) & RBSHIFT2MASK ) \
282 	) >> 2 \
283 )
284 
285 
286 // ( c1*2 + c2 + c3 ) / 4
287 // hq3x, hq4x
288 #define Interp2_16( pc, c1, c2, c3 ) \
289 ( \
290 	*( (unsigned short *)(pc) ) = \
291 	( ( (c1) == (c2) ) == (c3) ) ? c1 : \
292 	( \
293 		( ( \
294 			( ( (c1) & GMASK ) * 2 ) + \
295 			( (c2) & GMASK ) + \
296 			( (c3) & GMASK ) \
297 		) & GSHIFT2MASK ) \
298 		+ \
299 		( ( \
300 			( ( (c1) & RBMASK ) * 2 ) + \
301 			( (c2) & RBMASK ) + \
302 			( (c3) & RBMASK ) \
303 		) & RBSHIFT2MASK ) \
304 	) >> 2 \
305 )
306 
307 
308 // ( c1*7 + c2 ) / 8
309 // hq3x, hq4x
310 #define Interp3_16( pc, c1, c2 ) \
311 ( \
312 	*( (unsigned short *)(pc) ) = \
313 	( (c1) == (c2) ) ? c1 : \
314 	( \
315 		( ( \
316 			( ( (c1) & GMASK ) * 7 ) + \
317 			( (c2) & GMASK ) \
318 		) & GSHIFT3MASK ) \
319 		+ \
320 		( ( \
321 			( ( (c1) & RBMASK ) * 7 ) + \
322 			( (c2) & RBMASK ) \
323 		) & RBSHIFT3MASK ) \
324 	) >> 3 \
325 )
326 
327 
328 // ( c1*2 + (c2+c3)*7 ) / 16
329 // hq3x, not used by hq4x
330 #define Interp4_16( pc, c1, c2, c3 ) \
331 ( \
332 	*( (unsigned short *)(pc) ) = \
333 	( ( (c1) == (c2) ) == (c3) ) ? c1 : \
334 	( \
335 	( ( ( ( (c1) & GMASK ) * 2 ) + ( ( ( (c2) & GMASK ) + ( (c3) & GMASK ) ) * 7 ) ) & GSHIFT4MASK ) + \
336 	( ( ( ( (c1) & RBMASK ) * 2 ) + ( ( ( (c2) & RBMASK ) + ( (c3) & RBMASK ) ) * 7 ) ) & RBSHIFT4MASK ) \
337 	) >> 4 \
338 )
339 
340 
341 // ( c1 + c2 ) / 2
342 // hq3x, hq4x
343 #define Interp5_16( pc, c1, c2 ) \
344 ( \
345 	*( (unsigned short *)(pc) ) = \
346 	( (c1) == (c2) ) ? c1 : \
347 	( \
348 		( ( \
349 			( (c1) & GMASK ) + \
350 			( (c2) & GMASK ) \
351 		) & GSHIFT1MASK ) \
352 		+ \
353 		( ( \
354 			( (c1) & RBMASK ) + \
355 			( (c2) & RBMASK ) \
356 		) & RBSHIFT1MASK ) \
357 	) >> 1 \
358 )
359 
360 
361 // ( c1*5 + c2*2 + c3 ) / 8
362 // hq4x
363 #define Interp6_16( pc, c1, c2, c3 ) \
364 ( \
365 	*( (unsigned short *)(pc) ) = \
366 	( ( (c1) == (c2) ) == (c3) ) ? c1 : \
367 	( \
368 		( ( \
369 			( ( (c1) & GMASK ) * 5 ) + \
370 			( ( (c2) & GMASK ) * 2 ) + \
371 			( (c3) & GMASK ) \
372 		) & GSHIFT3MASK ) \
373 		+ \
374 		( ( \
375 			( ( (c1) & RBMASK ) * 5 ) + \
376 			( ( (c2) & RBMASK ) * 2 ) + \
377 			( (c3) & RBMASK ) \
378 		) & RBSHIFT3MASK ) \
379 	) >> 3 \
380 )
381 
382 
383 // ( c1*6 + c2 + c3 ) / 8
384 // hq4x
385 #define Interp7_16( pc, c1, c2, c3 ) \
386 ( \
387 	*( (unsigned short *)(pc) ) = \
388 	( ( (c1) == (c2) ) == (c3) ) ? c1 : \
389 	( \
390 		( ( \
391 			( ( (c1) & GMASK ) * 6 ) + \
392 			( (c2) & GMASK ) + \
393 			( (c3) & GMASK ) \
394 		) & GSHIFT3MASK ) \
395 		+ \
396 		( ( \
397 			( ( (c1) & RBMASK ) * 6 ) + \
398 			( (c2) & RBMASK ) + \
399 			( (c3) & RBMASK ) \
400 		) & RBSHIFT3MASK ) \
401 	) >> 3 \
402 )
403 
404 
405 // ( c1*5 + c2*3 ) / 8
406 // hq4x
407 #define Interp8_16( pc, c1, c2 ) \
408 ( \
409 	*( (unsigned short *)(pc) ) = \
410 	( (c1) == (c2) ) ? c1 : \
411 	( \
412 		( ( \
413 			( ( (c1) & GMASK ) * 5 ) + \
414 			( ( (c2) & GMASK ) * 3 ) \
415 		) & GSHIFT3MASK ) \
416 		+ \
417 		( ( \
418 			( ( (c1) & RBMASK ) * 5 ) + \
419 			( ( (c2) & RBMASK ) * 3 ) \
420 		) & RBSHIFT3MASK ) \
421 	) >> 3 \
422 )
423 
424 
425 // 16 bit input color
426 // 0x00YYUUVV return value
RGBtoYUV_16(unsigned short c)427 inline unsigned int RGBtoYUV_16( unsigned short c )
428 {
429 	// Division through 3 slows down the emulation about 10% !!!
430 
431 	register unsigned char r, g, b;
432 #ifdef RGB555
433 	r = ( c & 0x7C00 ) >> 7;
434 	g = ( c & 0x03E0 ) >> 2;
435 	b = ( c & 0x001F ) << 3;
436 #else
437 	r = ( c & 0xF800 ) >> 8;
438 	g = ( c & 0x07E0 ) >> 3;
439 	b = ( c & 0x001F ) << 3;
440 #endif
441 
442 	return ( (r + g + b) << 14 ) +
443 		( ( r - b + 512 ) << 4 ) +
444 		( ( 2*g - r - b ) >> 3 ) + 128;
445 }
446