1 // Copyright (C) 2002-2012 Nikolaus Gebhardt / Thomas Alten
2 // This file is part of the "Irrlicht Engine".
3 // For conditions of distribution and use, see copyright notice in irrlicht.h
4 
5 /*
6 	History:
7 	- changed behavior for log2 textures ( replaced multiplies by shift )
8 */
9 
10 #ifndef __S_VIDEO_2_SOFTWARE_HELPER_H_INCLUDED__
11 #define __S_VIDEO_2_SOFTWARE_HELPER_H_INCLUDED__
12 
13 #include "SoftwareDriver2_compile_config.h"
14 #include "irrMath.h"
15 #include "SMaterial.h"
16 
17 
18 
19 namespace irr
20 {
21 
22 // supporting different packed pixel needs many defines...
23 
24 #ifdef SOFTWARE_DRIVER_2_32BIT
25 	typedef u32	tVideoSample;
26 
27 	#define	MASK_A	0xFF000000
28 	#define	MASK_R	0x00FF0000
29 	#define	MASK_G	0x0000FF00
30 	#define	MASK_B	0x000000FF
31 
32 	#define	SHIFT_A	24
33 	#define	SHIFT_R	16
34 	#define	SHIFT_G	8
35 	#define	SHIFT_B	0
36 
37 	#define	COLOR_MAX					0xFF
38 	#define	COLOR_MAX_LOG2				8
39 	#define	COLOR_BRIGHT_WHITE			0xFFFFFFFF
40 
41 	#define VIDEO_SAMPLE_GRANULARITY	2
42 
43 #else
44 	typedef u16	tVideoSample;
45 
46 	#define	MASK_A	0x8000
47 	#define	MASK_R	0x7C00
48 	#define	MASK_G	0x03E0
49 	#define	MASK_B	0x001F
50 
51 	#define	SHIFT_A	15
52 	#define	SHIFT_R	10
53 	#define	SHIFT_G	5
54 	#define	SHIFT_B	0
55 
56 	#define	COLOR_MAX					0x1F
57 	#define	COLOR_MAX_LOG2				5
58 	#define	COLOR_BRIGHT_WHITE			0xFFFF
59 	#define VIDEO_SAMPLE_GRANULARITY	1
60 
61 #endif
62 
63 
64 
65 
66 // ----------------------- Generic ----------------------------------
67 
68 //! a more useful memset for pixel
69 // (standard memset only works with 8-bit values)
memset32(void * dest,const u32 value,u32 bytesize)70 inline void memset32(void * dest, const u32 value, u32 bytesize)
71 {
72 	u32 * d = (u32*) dest;
73 
74 	u32 i;
75 
76 	// loops unrolled to reduce the number of increments by factor ~8.
77 	i = bytesize >> (2 + 3);
78 	while (i)
79 	{
80 		d[0] = value;
81 		d[1] = value;
82 		d[2] = value;
83 		d[3] = value;
84 
85 		d[4] = value;
86 		d[5] = value;
87 		d[6] = value;
88 		d[7] = value;
89 
90 		d += 8;
91 		i -= 1;
92 	}
93 
94 	i = (bytesize >> 2 ) & 7;
95 	while (i)
96 	{
97 		d[0] = value;
98 		d += 1;
99 		i -= 1;
100 	}
101 }
102 
103 //! a more useful memset for pixel
104 // (standard memset only works with 8-bit values)
memset16(void * dest,const u16 value,u32 bytesize)105 inline void memset16(void * dest, const u16 value, u32 bytesize)
106 {
107 	u16 * d = (u16*) dest;
108 
109 	u32 i;
110 
111 	// loops unrolled to reduce the number of increments by factor ~8.
112 	i = bytesize >> (1 + 3);
113 	while (i)
114 	{
115 		d[0] = value;
116 		d[1] = value;
117 		d[2] = value;
118 		d[3] = value;
119 
120 		d[4] = value;
121 		d[5] = value;
122 		d[6] = value;
123 		d[7] = value;
124 
125 		d += 8;
126 		--i;
127 	}
128 
129 	i = (bytesize >> 1 ) & 7;
130 	while (i)
131 	{
132 		d[0] = value;
133 		++d;
134 		--i;
135 	}
136 }
137 
138 /*
139 	use biased loop counter
140 	--> 0 byte copy is forbidden
141 */
memcpy32_small(void * dest,const void * source,u32 bytesize)142 REALINLINE void memcpy32_small ( void * dest, const void *source, u32 bytesize )
143 {
144 	u32 c = bytesize >> 2;
145 
146 	do
147 	{
148 		((u32*) dest ) [ c-1 ] = ((u32*) source) [ c-1 ];
149 	} while ( --c );
150 
151 }
152 
153 
154 
155 // integer log2 of a float ieee 754. TODO: non ieee floating point
s32_log2_f32(f32 f)156 static inline s32 s32_log2_f32( f32 f)
157 {
158 	u32 x = IR ( f );
159 	return ((x & 0x7F800000) >> 23) - 127;
160 }
161 
s32_log2_s32(u32 x)162 static inline s32 s32_log2_s32(u32 x)
163 {
164 	return s32_log2_f32( (f32) x);
165 }
166 
s32_abs(s32 x)167 static inline s32 s32_abs(s32 x)
168 {
169 	s32 b = x >> 31;
170 	return (x ^ b ) - b;
171 }
172 
173 
174 //! conditional set based on mask and arithmetic shift
if_mask_a_else_b(const u32 mask,const u32 a,const u32 b)175 REALINLINE u32 if_mask_a_else_b ( const u32 mask, const u32 a, const u32 b )
176 {
177 	return ( mask & ( a ^ b ) ) ^ b;
178 }
179 
180 // ------------------ Video---------------------------------------
181 /*!
182 	Pixel = dest * ( 1 - alpha ) + source * alpha
183 	alpha [0;256]
184 */
PixelBlend32(const u32 c2,const u32 c1,u32 alpha)185 REALINLINE u32 PixelBlend32 ( const u32 c2, const u32 c1, u32 alpha )
186 {
187 	u32 srcRB = c1 & 0x00FF00FF;
188 	u32 srcXG = c1 & 0x0000FF00;
189 
190 	u32 dstRB = c2 & 0x00FF00FF;
191 	u32 dstXG = c2 & 0x0000FF00;
192 
193 
194 	u32 rb = srcRB - dstRB;
195 	u32 xg = srcXG - dstXG;
196 
197 	rb *= alpha;
198 	xg *= alpha;
199 	rb >>= 8;
200 	xg >>= 8;
201 
202 	rb += dstRB;
203 	xg += dstXG;
204 
205 	rb &= 0x00FF00FF;
206 	xg &= 0x0000FF00;
207 
208 	return rb | xg;
209 }
210 
211 /*!
212 	Pixel = dest * ( 1 - alpha ) + source * alpha
213 	alpha [0;32]
214 */
PixelBlend16(const u16 c2,const u32 c1,const u16 alpha)215 inline u16 PixelBlend16 ( const u16 c2, const u32 c1, const u16 alpha )
216 {
217 	const u16 srcRB = c1 & 0x7C1F;
218 	const u16 srcXG = c1 & 0x03E0;
219 
220 	const u16 dstRB = c2 & 0x7C1F;
221 	const u16 dstXG = c2 & 0x03E0;
222 
223 	u32 rb = srcRB - dstRB;
224 	u32 xg = srcXG - dstXG;
225 
226 	rb *= alpha;
227 	xg *= alpha;
228 	rb >>= 5;
229 	xg >>= 5;
230 
231 	rb += dstRB;
232 	xg += dstXG;
233 
234 	rb &= 0x7C1F;
235 	xg &= 0x03E0;
236 
237 	return (u16)(rb | xg);
238 }
239 
240 /*
241 	Pixel = c0 * (c1/31). c0 Alpha retain
242 */
PixelMul16(const u16 c0,const u16 c1)243 inline u16 PixelMul16 ( const u16 c0, const u16 c1)
244 {
245 	return (u16)((( ( (c0 & 0x7C00) * (c1 & 0x7C00) ) & 0x3E000000 ) >> 15 ) |
246 			(( ( (c0 & 0x03E0) * (c1 & 0x03E0) ) & 0x000F8000 ) >> 10 ) |
247 			(( ( (c0 & 0x001F) * (c1 & 0x001F) ) & 0x000003E0 ) >> 5 ) |
248 			(c0 & 0x8000));
249 }
250 
251 /*
252 	Pixel = c0 * (c1/31).
253 */
PixelMul16_2(u16 c0,u16 c1)254 inline u16 PixelMul16_2 ( u16 c0, u16 c1)
255 {
256 	return	(u16)(( ( (c0 & 0x7C00) * (c1 & 0x7C00) ) & 0x3E000000 ) >> 15 |
257 			( ( (c0 & 0x03E0) * (c1 & 0x03E0) ) & 0x000F8000 ) >> 10 |
258 			( ( (c0 & 0x001F) * (c1 & 0x001F) ) & 0x000003E0 ) >> 5  |
259 			( c0 & c1 & 0x8000));
260 }
261 
262 /*
263 	Pixel = c0 * (c1/255). c0 Alpha Retain
264 */
PixelMul32(const u32 c0,const u32 c1)265 REALINLINE u32 PixelMul32 ( const u32 c0, const u32 c1)
266 {
267 	return	(c0 & 0xFF000000) |
268 			(( ( (c0 & 0x00FF0000) >> 12 ) * ( (c1 & 0x00FF0000) >> 12 ) ) & 0x00FF0000 ) |
269 			(( ( (c0 & 0x0000FF00) * (c1 & 0x0000FF00) ) >> 16 ) & 0x0000FF00 ) |
270 			(( ( (c0 & 0x000000FF) * (c1 & 0x000000FF) ) >> 8  ) & 0x000000FF);
271 }
272 
273 /*
274 	Pixel = c0 * (c1/255).
275 */
PixelMul32_2(const u32 c0,const u32 c1)276 REALINLINE u32 PixelMul32_2 ( const u32 c0, const u32 c1)
277 {
278 	return	(( ( (c0 & 0xFF000000) >> 16 ) * ( (c1 & 0xFF000000) >> 16 ) ) & 0xFF000000 ) |
279 			(( ( (c0 & 0x00FF0000) >> 12 ) * ( (c1 & 0x00FF0000) >> 12 ) ) & 0x00FF0000 ) |
280 			(( ( (c0 & 0x0000FF00) * (c1 & 0x0000FF00) ) >> 16 ) & 0x0000FF00 ) |
281 			(( ( (c0 & 0x000000FF) * (c1 & 0x000000FF) ) >> 8  ) & 0x000000FF);
282 }
283 
284 /*
285 	Pixel = clamp ( c0 + c1, 0, 255 )
286 */
PixelAdd32(const u32 c2,const u32 c1)287 REALINLINE u32 PixelAdd32 ( const u32 c2, const u32 c1)
288 {
289 	u32 sum = ( c2 & 0x00FFFFFF )  + ( c1 & 0x00FFFFFF );
290 	u32 low_bits = ( c2 ^ c1 ) & 0x00010101;
291 	s32 carries  = ( sum - low_bits ) & 0x01010100;
292 	u32 modulo = sum - carries;
293 	u32 clamp = carries - ( carries >> 8 );
294 	return modulo | clamp;
295 }
296 
297 #if 0
298 
299 // 1 - Bit Alpha Blending
300 inline u16 PixelBlend16 ( const u16 destination, const u16 source )
301 {
302    if((source & 0x8000) == 0x8000)
303       return source; // The source is visible, so use it.
304    else
305       return destination; // The source is transparent, so use the destination.
306 }
307 
308 // 1 - Bit Alpha Blending 16Bit SIMD
309 inline u32 PixelBlend16_simd ( const u32 destination, const u32 source )
310 {
311 	switch(source & 0x80008000)
312 	{
313 		case 0x80008000: // Both source pixels are visible
314 			return source;
315 
316 		case 0x80000000: // Only the first source pixel is visible
317 			return (source & 0xFFFF0000) | (destination & 0x0000FFFF);
318 
319 		case 0x00008000: // Only the second source pixel is visible.
320 			return (destination & 0xFFFF0000) | (source & 0x0000FFFF);
321 
322 		default: // Neither source pixel is visible.
323 			return destination;
324 	}
325 }
326 #else
327 
328 // 1 - Bit Alpha Blending
PixelBlend16(const u16 c2,const u16 c1)329 inline u16 PixelBlend16 ( const u16 c2, const u16 c1 )
330 {
331 	u16 mask = ((c1 & 0x8000) >> 15 ) + 0x7fff;
332 	return (c2 & mask ) | ( c1 & ~mask );
333 }
334 
335 // 1 - Bit Alpha Blending 16Bit SIMD
PixelBlend16_simd(const u32 c2,const u32 c1)336 inline u32 PixelBlend16_simd ( const u32 c2, const u32 c1 )
337 {
338 	u32 mask = ((c1 & 0x80008000) >> 15 ) + 0x7fff7fff;
339 	return (c2 & mask ) | ( c1 & ~mask );
340 }
341 
342 #endif
343 
344 /*!
345 	Pixel = dest * ( 1 - SourceAlpha ) + source * SourceAlpha
346 */
PixelBlend32(const u32 c2,const u32 c1)347 inline u32 PixelBlend32 ( const u32 c2, const u32 c1 )
348 {
349 	// alpha test
350 	u32 alpha = c1 & 0xFF000000;
351 
352 	if ( 0 == alpha )
353 		return c2;
354 
355 	if ( 0xFF000000 == alpha )
356 	{
357 		return c1;
358 	}
359 
360 	alpha >>= 24;
361 
362 	// add highbit alpha, if ( alpha > 127 ) alpha += 1;
363 	alpha += ( alpha >> 7);
364 
365 	u32 srcRB = c1 & 0x00FF00FF;
366 	u32 srcXG = c1 & 0x0000FF00;
367 
368 	u32 dstRB = c2 & 0x00FF00FF;
369 	u32 dstXG = c2 & 0x0000FF00;
370 
371 
372 	u32 rb = srcRB - dstRB;
373 	u32 xg = srcXG - dstXG;
374 
375 	rb *= alpha;
376 	xg *= alpha;
377 	rb >>= 8;
378 	xg >>= 8;
379 
380 	rb += dstRB;
381 	xg += dstXG;
382 
383 	rb &= 0x00FF00FF;
384 	xg &= 0x0000FF00;
385 
386 	return (c1 & 0xFF000000) | rb | xg;
387 }
388 
389 
390 
391 // ------------------ Fix Point ----------------------------------
392 
393 typedef s32 tFixPoint;
394 typedef u32 tFixPointu;
395 
396 // Fix Point 12
397 #if 0
398 	#define FIX_POINT_PRE			12
399 	#define FIX_POINT_FRACT_MASK	0xFFF
400 	#define FIX_POINT_SIGNED_MASK	0xFFFFF000
401 	#define FIX_POINT_UNSIGNED_MASK	0x7FFFF000
402 	#define FIX_POINT_ONE			0x1000
403 	#define FIX_POINT_ZERO_DOT_FIVE	0x0800
404 	#define FIX_POINT_F32_MUL		4096.f
405 #endif
406 
407 // Fix Point 10
408 #if 1
409 	#define FIX_POINT_PRE			10
410 	#define FIX_POINT_FRACT_MASK	0x3FF
411 	#define FIX_POINT_SIGNED_MASK	0xFFFFFC00
412 	#define FIX_POINT_UNSIGNED_MASK	0x7FFFFE00
413 	#define FIX_POINT_ONE			0x400
414 	#define FIX_POINT_ZERO_DOT_FIVE	0x200
415 	#define FIX_POINT_F32_MUL		1024.f
416 #endif
417 
418 // Fix Point 9
419 #if 0
420 	#define FIX_POINT_PRE			9
421 	#define FIX_POINT_FRACT_MASK	0x1FF
422 	#define FIX_POINT_SIGNED_MASK	0xFFFFFE00
423 	#define FIX_POINT_UNSIGNED_MASK	0x7FFFFE00
424 	#define FIX_POINT_ONE			0x200
425 	#define FIX_POINT_ZERO_DOT_FIVE	0x100
426 	#define FIX_POINT_F32_MUL		512.f
427 #endif
428 
429 // Fix Point 7
430 #if 0
431 	#define FIX_POINT_PRE			7
432 	#define FIX_POINT_FRACT_MASK	0x7F
433 	#define FIX_POINT_SIGNED_MASK	0xFFFFFF80
434 	#define FIX_POINT_UNSIGNED_MASK	0x7FFFFF80
435 	#define FIX_POINT_ONE			0x80
436 	#define FIX_POINT_ZERO_DOT_FIVE	0x40
437 	#define FIX_POINT_F32_MUL		128.f
438 #endif
439 
440 #define	FIXPOINT_COLOR_MAX		( COLOR_MAX << FIX_POINT_PRE )
441 #define FIX_POINT_HALF_COLOR ( (tFixPoint) ( ((f32) COLOR_MAX / 2.f * FIX_POINT_F32_MUL ) ) )
442 
443 
444 /*
445 	convert signed integer to fixpoint
446 */
s32_to_fixPoint(const s32 x)447 inline tFixPoint s32_to_fixPoint (const s32 x)
448 {
449 	return x << FIX_POINT_PRE;
450 }
451 
u32_to_fixPoint(const u32 x)452 inline tFixPointu u32_to_fixPoint (const u32 x)
453 {
454 	return x << FIX_POINT_PRE;
455 }
456 
fixPointu_to_u32(const tFixPointu x)457 inline u32 fixPointu_to_u32 (const tFixPointu x)
458 {
459 	return x >> FIX_POINT_PRE;
460 }
461 
462 
463 // 1/x * FIX_POINT
fix_inverse32(const f32 x)464 REALINLINE f32 fix_inverse32 ( const f32 x )
465 {
466 	return FIX_POINT_F32_MUL / x;
467 }
468 
469 
470 /*
471 	convert float to fixpoint
472 	fast convert (fistp on x86) HAS to be used..
473 	hints: compileflag /QIfist for msvc7. msvc 8.0 has smth different
474 	others should use their favourite assembler..
475 */
f_round2(f32 f)476 static inline int f_round2(f32 f)
477 {
478 	f += (3<<22);
479 	return IR(f) - 0x4b400000;
480 }
481 
482 /*
483 	convert f32 to Fix Point.
484 	multiply is needed anyway, so scale mulby
485 */
486 REALINLINE tFixPoint tofix (const f32 x, const f32 mulby = FIX_POINT_F32_MUL )
487 {
488 	return (tFixPoint) (x * mulby);
489 }
490 
491 
492 /*
493 	Fix Point , Fix Point Multiply
494 */
imulFixu(const tFixPointu x,const tFixPointu y)495 REALINLINE tFixPointu imulFixu(const tFixPointu x, const tFixPointu y)
496 {
497 	return (x * y) >> (tFixPointu) FIX_POINT_PRE;
498 }
499 
500 /*
501 	Fix Point , Fix Point Multiply
502 */
imulFix(const tFixPoint x,const tFixPoint y)503 REALINLINE tFixPoint imulFix(const tFixPoint x, const tFixPoint y)
504 {
505 	return ( x * y) >> ( FIX_POINT_PRE );
506 }
507 
508 /*
509 	Fix Point , Fix Point Multiply x * y * 2
510 */
imulFix2(const tFixPoint x,const tFixPoint y)511 REALINLINE tFixPoint imulFix2(const tFixPoint x, const tFixPoint y)
512 {
513 	return ( x * y) >> ( FIX_POINT_PRE -1 );
514 }
515 
516 
517 /*
518 	Multiply x * y * 1
519 */
imulFix_tex1(const tFixPoint x,const tFixPoint y)520 REALINLINE tFixPoint imulFix_tex1(const tFixPoint x, const tFixPoint y)
521 {
522 	return ( ( (tFixPointu) x >> 2 ) * ( (tFixPointu) y >> 2 ) ) >> (tFixPointu) ( FIX_POINT_PRE + 4 );
523 }
524 
525 /*
526 	Multiply x * y * 2
527 */
imulFix_tex2(const tFixPoint x,const tFixPoint y)528 REALINLINE tFixPoint imulFix_tex2(const tFixPoint x, const tFixPoint y)
529 {
530 	return ( ( (tFixPointu) x >> 2 ) * ( (tFixPointu) y >> 2 ) ) >> (tFixPointu) ( FIX_POINT_PRE + 3 );
531 }
532 
533 /*
534 	Multiply x * y * 4
535 */
imulFix_tex4(const tFixPoint x,const tFixPoint y)536 REALINLINE tFixPoint imulFix_tex4(const tFixPoint x, const tFixPoint y)
537 {
538 #ifdef SOFTWARE_DRIVER_2_32BIT
539 	return ( ( (tFixPointu) x >> 2 ) * ( (tFixPointu) y >> 2 ) ) >> (tFixPointu) ( FIX_POINT_PRE + 2 );
540 #else
541 	return ( x * y) >> ( FIX_POINT_PRE + ( VIDEO_SAMPLE_GRANULARITY * 3 ) );
542 #endif
543 }
544 
545 /*!
546 	clamp FixPoint to maxcolor in FixPoint, min(a,31)
547 */
clampfix_maxcolor(const tFixPoint a)548 REALINLINE tFixPoint clampfix_maxcolor ( const tFixPoint a)
549 {
550 	tFixPoint c = (a - FIXPOINT_COLOR_MAX) >> 31;
551 	return (a & c) | ( FIXPOINT_COLOR_MAX & ~c);
552 }
553 
554 /*!
555 	clamp FixPoint to 0 in FixPoint, max(a,0)
556 */
clampfix_mincolor(const tFixPoint a)557 REALINLINE tFixPoint clampfix_mincolor ( const tFixPoint a)
558 {
559 	return a - ( a & ( a >> 31 ) );
560 }
561 
saturateFix(const tFixPoint a)562 REALINLINE tFixPoint saturateFix ( const tFixPoint a)
563 {
564 	return clampfix_mincolor ( clampfix_maxcolor ( a ) );
565 }
566 
567 
568 // rount fixpoint to int
roundFix(const tFixPoint x)569 inline s32 roundFix ( const tFixPoint x )
570 {
571 	return ( x + FIX_POINT_ZERO_DOT_FIVE ) >> FIX_POINT_PRE;
572 }
573 
574 
575 
576 // x in [0;1[
f32_to_23Bits(const f32 x)577 inline s32 f32_to_23Bits(const f32 x)
578 {
579     f32 y = x + 1.f;
580     return IR(y) & 0x7FFFFF;	// last 23 bits
581 }
582 
583 /*!
584 	return VideoSample from fixpoint
585 */
fix_to_color(const tFixPoint r,const tFixPoint g,const tFixPoint b)586 REALINLINE tVideoSample fix_to_color ( const tFixPoint r, const tFixPoint g, const tFixPoint b )
587 {
588 #ifdef __BIG_ENDIAN__
589 	return	FIXPOINT_COLOR_MAX |
590 			( r & FIXPOINT_COLOR_MAX) >> ( FIX_POINT_PRE - 8) |
591 			( g & FIXPOINT_COLOR_MAX) << ( 16 - FIX_POINT_PRE ) |
592 			( b & FIXPOINT_COLOR_MAX) << ( 24 - FIX_POINT_PRE );
593 #else
594 	return	( FIXPOINT_COLOR_MAX & FIXPOINT_COLOR_MAX) << ( SHIFT_A - FIX_POINT_PRE ) |
595 			( r & FIXPOINT_COLOR_MAX) << ( SHIFT_R - FIX_POINT_PRE ) |
596 			( g & FIXPOINT_COLOR_MAX) >> ( FIX_POINT_PRE - SHIFT_G ) |
597 			( b & FIXPOINT_COLOR_MAX) >> ( FIX_POINT_PRE - SHIFT_B );
598 #endif
599 }
600 
601 
602 /*!
603 	return VideoSample from fixpoint
604 */
fix4_to_color(const tFixPoint a,const tFixPoint r,const tFixPoint g,const tFixPoint b)605 REALINLINE tVideoSample fix4_to_color ( const tFixPoint a, const tFixPoint r, const tFixPoint g, const tFixPoint b )
606 {
607 #ifdef __BIG_ENDIAN__
608 	return	( a & (FIX_POINT_FRACT_MASK - 1 )) >> ( FIX_POINT_PRE ) |
609 			( r & FIXPOINT_COLOR_MAX) >> ( FIX_POINT_PRE - 8) |
610 			( g & FIXPOINT_COLOR_MAX) << ( 16 - FIX_POINT_PRE ) |
611 			( b & FIXPOINT_COLOR_MAX) << ( 24 - FIX_POINT_PRE );
612 #else
613 	return	( a & (FIX_POINT_FRACT_MASK - 1 )) << ( SHIFT_A - 1 ) |
614 			( r & FIXPOINT_COLOR_MAX) << ( SHIFT_R - FIX_POINT_PRE ) |
615 			( g & FIXPOINT_COLOR_MAX) >> ( FIX_POINT_PRE - SHIFT_G ) |
616 			( b & FIXPOINT_COLOR_MAX) >> ( FIX_POINT_PRE - SHIFT_B );
617 #endif
618 
619 }
620 
621 /*!
622 	return fixpoint from VideoSample granularity COLOR_MAX
623 */
color_to_fix(tFixPoint & r,tFixPoint & g,tFixPoint & b,const tVideoSample t00)624 inline void color_to_fix ( tFixPoint &r, tFixPoint &g, tFixPoint &b, const tVideoSample t00 )
625 {
626 	(tFixPointu&) r	 =	(t00 & MASK_R) >> ( SHIFT_R - FIX_POINT_PRE );
627 	(tFixPointu&) g	 =	(t00 & MASK_G) << ( FIX_POINT_PRE - SHIFT_G );
628 	(tFixPointu&) b	 =	(t00 & MASK_B) << ( FIX_POINT_PRE - SHIFT_B );
629 }
630 
631 /*!
632 	return fixpoint from VideoSample granularity COLOR_MAX
633 */
color_to_fix(tFixPoint & a,tFixPoint & r,tFixPoint & g,tFixPoint & b,const tVideoSample t00)634 inline void color_to_fix ( tFixPoint &a, tFixPoint &r, tFixPoint &g, tFixPoint &b, const tVideoSample t00 )
635 {
636 	(tFixPointu&) a	 =	(t00 & MASK_A) >> ( SHIFT_A - FIX_POINT_PRE );
637 	(tFixPointu&) r	 =	(t00 & MASK_R) >> ( SHIFT_R - FIX_POINT_PRE );
638 	(tFixPointu&) g	 =	(t00 & MASK_G) << ( FIX_POINT_PRE - SHIFT_G );
639 	(tFixPointu&) b	 =	(t00 & MASK_B) << ( FIX_POINT_PRE - SHIFT_B );
640 }
641 
642 /*!
643 	return fixpoint from VideoSample granularity 0..FIX_POINT_ONE
644 */
color_to_fix1(tFixPoint & r,tFixPoint & g,tFixPoint & b,const tVideoSample t00)645 inline void color_to_fix1 ( tFixPoint &r, tFixPoint &g, tFixPoint &b, const tVideoSample t00 )
646 {
647 	(tFixPointu&) r	 =	(t00 & MASK_R) >> ( SHIFT_R + COLOR_MAX_LOG2 - FIX_POINT_PRE );
648 	(tFixPointu&) g	 =	(t00 & MASK_G) >> ( SHIFT_G + COLOR_MAX_LOG2 - FIX_POINT_PRE );
649 	(tFixPointu&) b	 =	(t00 & MASK_B) << ( FIX_POINT_PRE - COLOR_MAX_LOG2 );
650 }
651 
652 /*!
653 	return fixpoint from VideoSample granularity 0..FIX_POINT_ONE
654 */
color_to_fix1(tFixPoint & a,tFixPoint & r,tFixPoint & g,tFixPoint & b,const tVideoSample t00)655 inline void color_to_fix1 ( tFixPoint &a, tFixPoint &r, tFixPoint &g, tFixPoint &b, const tVideoSample t00 )
656 {
657 	(tFixPointu&) a	 =	(t00 & MASK_A) >> ( SHIFT_A + COLOR_MAX_LOG2 - FIX_POINT_PRE );
658 	(tFixPointu&) r	 =	(t00 & MASK_R) >> ( SHIFT_R + COLOR_MAX_LOG2 - FIX_POINT_PRE );
659 	(tFixPointu&) g	 =	(t00 & MASK_G) >> ( SHIFT_G + COLOR_MAX_LOG2 - FIX_POINT_PRE );
660 	(tFixPointu&) b	 =	(t00 & MASK_B) << ( FIX_POINT_PRE - COLOR_MAX_LOG2 );
661 }
662 
663 
664 
665 // ----- FP24 ---- floating point z-buffer
666 
667 #if 1
668 typedef f32 fp24;
669 #else
670 struct fp24
671 {
672 	u32 v;
673 
fp24fp24674 	fp24() {}
675 
fp24fp24676 	fp24 ( const f32 f )
677 	{
678 	    f32 y = f + 1.f;
679 	    v = ((u32&)y) & 0x7FFFFF;	// last 23 bits
680 	}
681 
682 	void operator=(const f32 f )
683 	{
684 	    f32 y = f + 1.f;
685 	    v = ((u32&)y) & 0x7FFFFF;	// last 23 bits
686 	}
687 
688 	void operator+=(const fp24 &other )
689 	{
690 		v += other.v;
691 	}
692 
f32fp24693 	operator f32 () const
694 	{
695 		f32 r = FR ( v );
696 		return r + 1.f;
697 	}
698 
699 };
700 #endif
701 
702 
703 // ------------------------ Internal Texture -----------------------------
704 
705 struct sInternalTexture
706 {
707 	u32 textureXMask;
708 	u32 textureYMask;
709 
710 	u32 pitchlog2;
711 	void *data;
712 
713 //	video::CSoftwareTexture2 *Texture;
714 	s32 lodLevel;
715 };
716 
717 
718 
719 // get video sample plain
getTexel_plain(const sInternalTexture * t,const tFixPointu tx,const tFixPointu ty)720 inline tVideoSample getTexel_plain ( const sInternalTexture * t, const tFixPointu tx, const tFixPointu ty )
721 {
722 	u32 ofs;
723 
724 	ofs = ( ( ty & t->textureYMask ) >> FIX_POINT_PRE ) << t->pitchlog2;
725 	ofs |= ( tx & t->textureXMask ) >> ( FIX_POINT_PRE - VIDEO_SAMPLE_GRANULARITY );
726 
727 	// texel
728 	return *((tVideoSample*)( (u8*) t->data + ofs ));
729 }
730 
731 // get video sample to fix
getTexel_fix(tFixPoint & r,tFixPoint & g,tFixPoint & b,const sInternalTexture * t,const tFixPointu tx,const tFixPointu ty)732 inline void getTexel_fix ( tFixPoint &r, tFixPoint &g, tFixPoint &b,
733 						const sInternalTexture * t, const tFixPointu tx, const tFixPointu ty
734 								)
735 {
736 	u32 ofs;
737 
738 	ofs = ( ( ty & t->textureYMask ) >> FIX_POINT_PRE ) << t->pitchlog2;
739 	ofs |= ( tx & t->textureXMask ) >> ( FIX_POINT_PRE - VIDEO_SAMPLE_GRANULARITY );
740 
741 	// texel
742 	tVideoSample t00;
743 	t00 = *((tVideoSample*)( (u8*) t->data + ofs ));
744 
745 	r	 =	(t00 & MASK_R) >> ( SHIFT_R - FIX_POINT_PRE);
746 	g	 =	(t00 & MASK_G) << ( FIX_POINT_PRE - SHIFT_G );
747 	b	 =	(t00 & MASK_B) << ( FIX_POINT_PRE - SHIFT_B );
748 
749 }
750 
751 // get video sample to fixpoint
getTexel_fix(tFixPoint & a,const sInternalTexture * t,const tFixPointu tx,const tFixPointu ty)752 REALINLINE void getTexel_fix ( tFixPoint &a,
753 								const sInternalTexture * t, const tFixPointu tx, const tFixPointu ty
754 								)
755 {
756 	u32 ofs;
757 
758 	ofs = ( ( ty & t->textureYMask ) >> FIX_POINT_PRE ) << t->pitchlog2;
759 	ofs |= ( tx & t->textureXMask ) >> ( FIX_POINT_PRE - VIDEO_SAMPLE_GRANULARITY );
760 
761 	// texel
762 	tVideoSample t00;
763 	t00 = *((tVideoSample*)( (u8*) t->data + ofs ));
764 
765 	a	 =	(t00 & MASK_A) >> ( SHIFT_A - FIX_POINT_PRE);
766 }
767 
768 
getSample_texture_dither(tFixPoint & r,tFixPoint & g,tFixPoint & b,const sInternalTexture * t,const tFixPointu tx,const tFixPointu ty,const u32 x,const u32 y)769 inline void getSample_texture_dither (	tFixPoint &r, tFixPoint &g, tFixPoint &b,
770 										const sInternalTexture * t, const tFixPointu tx, const tFixPointu ty,
771 										const u32 x, const u32 y
772 								)
773 {
774 	static const tFixPointu dithermask[] =
775 	{
776 		0x00,0x80,0x20,0xa0,
777 		0xc0,0x40,0xe0,0x60,
778 		0x30,0xb0,0x10,0x90,
779 		0xf0,0x70,0xd0,0x50
780 	};
781 
782 	const u32 index = (y & 3 ) << 2 | (x & 3);
783 
784 	const tFixPointu _ntx = (tx + dithermask [ index ] ) & t->textureXMask;
785 	const tFixPointu _nty = (ty + dithermask [ index ] ) & t->textureYMask;
786 
787 	u32 ofs;
788 	ofs = ( ( _nty ) >> FIX_POINT_PRE ) << t->pitchlog2;
789 	ofs |= ( _ntx ) >> ( FIX_POINT_PRE - VIDEO_SAMPLE_GRANULARITY );
790 
791 	// texel
792 	const tVideoSample t00 = *((tVideoSample*)( (u8*) t->data + ofs ));
793 
794 	(tFixPointu &) r	 =	(t00 & MASK_R) >> ( SHIFT_R - FIX_POINT_PRE);
795 	(tFixPointu &) g	 =	(t00 & MASK_G) << ( FIX_POINT_PRE - SHIFT_G );
796 	(tFixPointu &) b	 =	(t00 & MASK_B) << ( FIX_POINT_PRE - SHIFT_B );
797 
798 }
799 
800 /*
801 	load a sample from internal texture at position tx,ty to fixpoint
802 */
803 #ifndef SOFTWARE_DRIVER_2_BILINEAR
804 
805 // get Sample linear == getSample_fixpoint
806 
getSample_texture(tFixPoint & r,tFixPoint & g,tFixPoint & b,const sInternalTexture * t,const tFixPointu tx,const tFixPointu ty)807 inline void getSample_texture ( tFixPoint &r, tFixPoint &g, tFixPoint &b,
808 						const sInternalTexture * t, const tFixPointu tx, const tFixPointu ty
809 								)
810 {
811 	u32 ofs;
812 
813 	ofs = ( ( ty & t->textureYMask ) >> FIX_POINT_PRE ) << t->pitchlog2;
814 	ofs |= ( tx & t->textureXMask ) >> ( FIX_POINT_PRE - VIDEO_SAMPLE_GRANULARITY );
815 
816 	// texel
817 	const tVideoSample t00 = *((tVideoSample*)( (u8*) t->data + ofs ));
818 
819 	(tFixPointu &) r	 =	(t00 & MASK_R) >> ( SHIFT_R - FIX_POINT_PRE);
820 	(tFixPointu &) g	 =	(t00 & MASK_G) << ( FIX_POINT_PRE - SHIFT_G );
821 	(tFixPointu &) b	 =	(t00 & MASK_B) << ( FIX_POINT_PRE - SHIFT_B );
822 }
823 
getSample_texture(tFixPointu & a,tFixPointu & r,tFixPointu & g,tFixPointu & b,const sInternalTexture * t,const tFixPointu tx,const tFixPointu ty)824 inline void getSample_texture ( tFixPointu &a, tFixPointu &r, tFixPointu &g, tFixPointu &b,
825 						const sInternalTexture * t, const tFixPointu tx, const tFixPointu ty
826 								)
827 {
828 	u32 ofs;
829 
830 	ofs = ( ( ty & t->textureYMask ) >> FIX_POINT_PRE ) << t->pitchlog2;
831 	ofs |= ( tx & t->textureXMask ) >> ( FIX_POINT_PRE - VIDEO_SAMPLE_GRANULARITY );
832 
833 	// texel
834 	const tVideoSample t00 = *((tVideoSample*)( (u8*) t->data + ofs ));
835 
836 	(tFixPointu &)a	 =	(t00 & MASK_A) >> ( SHIFT_A - FIX_POINT_PRE);
837 	(tFixPointu &)r	 =	(t00 & MASK_R) >> ( SHIFT_R - FIX_POINT_PRE);
838 	(tFixPointu &)g	 =	(t00 & MASK_G) << ( FIX_POINT_PRE - SHIFT_G );
839 	(tFixPointu &)b	 =	(t00 & MASK_B) << ( FIX_POINT_PRE - SHIFT_B );
840 }
841 
842 
843 #else
844 
845 
846 // get sample linear
getSample_linear(tFixPointu & r,tFixPointu & g,tFixPointu & b,const sInternalTexture * t,const tFixPointu tx,const tFixPointu ty)847 REALINLINE void getSample_linear ( tFixPointu &r, tFixPointu &g, tFixPointu &b,
848 								const sInternalTexture * t, const tFixPointu tx, const tFixPointu ty
849 								)
850 {
851 	u32 ofs;
852 
853 	ofs = ( ( ty & t->textureYMask ) >> FIX_POINT_PRE ) << t->pitchlog2;
854 	ofs |= ( tx & t->textureXMask ) >> ( FIX_POINT_PRE - VIDEO_SAMPLE_GRANULARITY );
855 
856 	// texel
857 	tVideoSample t00;
858 	t00 = *((tVideoSample*)( (u8*) t->data + ofs ));
859 
860 	r	 =	(t00 & MASK_R) >> SHIFT_R;
861 	g	 =	(t00 & MASK_G) >> SHIFT_G;
862 	b	 =	(t00 & MASK_B);
863 }
864 
865 // get Sample bilinear
getSample_texture(tFixPoint & r,tFixPoint & g,tFixPoint & b,const sInternalTexture * t,const tFixPointu tx,const tFixPointu ty)866 REALINLINE void getSample_texture ( tFixPoint &r, tFixPoint &g, tFixPoint &b,
867 								const sInternalTexture * t, const tFixPointu tx, const tFixPointu ty
868 								)
869 {
870 
871 	tFixPointu r00,g00,b00;
872 	tFixPointu r01,g01,b01;
873 	tFixPointu r10,g10,b10;
874 	tFixPointu r11,g11,b11;
875 
876 #if 0
877 	getSample_linear ( r00, g00, b00, t, tx,ty );
878 	getSample_linear ( r10, g10, b10, t, tx + FIX_POINT_ONE,ty );
879 	getSample_linear ( r01, g01, b01, t, tx,ty + FIX_POINT_ONE );
880 	getSample_linear ( r11, g11, b11, t, tx + FIX_POINT_ONE,ty + FIX_POINT_ONE );
881 #else
882 	u32 o0, o1,o2,o3;
883 	tVideoSample t00;
884 
885 	o0 = ( ( (ty) & t->textureYMask ) >> FIX_POINT_PRE ) << t->pitchlog2;
886 	o1 = ( ( (ty+FIX_POINT_ONE) & t->textureYMask ) >> FIX_POINT_PRE ) << t->pitchlog2;
887 	o2 =   ( (tx) & t->textureXMask ) >> ( FIX_POINT_PRE - VIDEO_SAMPLE_GRANULARITY );
888 	o3 =   ( (tx+FIX_POINT_ONE) & t->textureXMask ) >> ( FIX_POINT_PRE - VIDEO_SAMPLE_GRANULARITY );
889 
890 	t00 = *((tVideoSample*)( (u8*) t->data + (o0 | o2 ) ));
891 	r00	 =	(t00 & MASK_R) >> SHIFT_R; g00  =	(t00 & MASK_G) >> SHIFT_G; b00	 =	(t00 & MASK_B);
892 
893 	t00 = *((tVideoSample*)( (u8*) t->data + (o0 | o3 ) ));
894 	r10	 =	(t00 & MASK_R) >> SHIFT_R; g10  =	(t00 & MASK_G) >> SHIFT_G; b10	 =	(t00 & MASK_B);
895 
896 	t00 = *((tVideoSample*)( (u8*) t->data + (o1 | o2 ) ));
897 	r01	 =	(t00 & MASK_R) >> SHIFT_R; g01  =	(t00 & MASK_G) >> SHIFT_G; b01	 =	(t00 & MASK_B);
898 
899 	t00 = *((tVideoSample*)( (u8*) t->data + (o1 | o3 ) ));
900 	r11	 =	(t00 & MASK_R) >> SHIFT_R; g11  =	(t00 & MASK_G) >> SHIFT_G; b11	 =	(t00 & MASK_B);
901 
902 #endif
903 
904 	const tFixPointu txFract = tx & FIX_POINT_FRACT_MASK;
905 	const tFixPointu txFractInv = FIX_POINT_ONE - txFract;
906 
907 	const tFixPointu tyFract = ty & FIX_POINT_FRACT_MASK;
908 	const tFixPointu tyFractInv = FIX_POINT_ONE - tyFract;
909 
910 	const tFixPointu w00 = imulFixu ( txFractInv, tyFractInv );
911 	const tFixPointu w10 = imulFixu ( txFract	, tyFractInv );
912 	const tFixPointu w01 = imulFixu ( txFractInv, tyFract );
913 	const tFixPointu w11 = imulFixu ( txFract	, tyFract );
914 
915 	r =		(r00 * w00 ) +
916 			(r01 * w01 ) +
917 			(r10 * w10 ) +
918 			(r11 * w11 );
919 
920 	g =		(g00 * w00 ) +
921 			(g01 * w01 ) +
922 			(g10 * w10 ) +
923 			(g11 * w11 );
924 
925 	b =		(b00 * w00 ) +
926 			(b01 * w01 ) +
927 			(b10 * w10 ) +
928 			(b11 * w11 );
929 
930 }
931 
932 
933 // get sample linear
getSample_linear(tFixPointu & a,tFixPointu & r,tFixPointu & g,tFixPointu & b,const sInternalTexture * t,const tFixPointu tx,const tFixPointu ty)934 REALINLINE void getSample_linear ( tFixPointu &a, tFixPointu &r, tFixPointu &g, tFixPointu &b,
935 								const sInternalTexture * t, const tFixPointu tx, const tFixPointu ty
936 								)
937 {
938 	u32 ofs;
939 
940 	ofs = ( ( ty & t->textureYMask ) >> FIX_POINT_PRE ) << t->pitchlog2;
941 	ofs |= ( tx & t->textureXMask ) >> ( FIX_POINT_PRE - VIDEO_SAMPLE_GRANULARITY );
942 
943 	// texel
944 	tVideoSample t00;
945 	t00 = *((tVideoSample*)( (u8*) t->data + ofs ));
946 
947 	a	 =	(t00 & MASK_A) >> SHIFT_A;
948 	r	 =	(t00 & MASK_R) >> SHIFT_R;
949 	g	 =	(t00 & MASK_G) >> SHIFT_G;
950 	b	 =	(t00 & MASK_B);
951 }
952 
953 // get Sample bilinear
getSample_texture(tFixPoint & a,tFixPoint & r,tFixPoint & g,tFixPoint & b,const sInternalTexture * t,const tFixPointu tx,const tFixPointu ty)954 REALINLINE void getSample_texture ( tFixPoint &a, tFixPoint &r, tFixPoint &g, tFixPoint &b,
955 								const sInternalTexture * t, const tFixPointu tx, const tFixPointu ty
956 								)
957 {
958 
959 	tFixPointu a00, r00,g00,b00;
960 	tFixPointu a01, r01,g01,b01;
961 	tFixPointu a10, r10,g10,b10;
962 	tFixPointu a11, r11,g11,b11;
963 
964 	getSample_linear ( a00, r00, g00, b00, t, tx,ty );
965 	getSample_linear ( a10, r10, g10, b10, t, tx + FIX_POINT_ONE,ty );
966 	getSample_linear ( a01, r01, g01, b01, t, tx,ty + FIX_POINT_ONE );
967 	getSample_linear ( a11, r11, g11, b11, t, tx + FIX_POINT_ONE,ty + FIX_POINT_ONE );
968 
969 	const tFixPointu txFract = tx & FIX_POINT_FRACT_MASK;
970 	const tFixPointu txFractInv = FIX_POINT_ONE - txFract;
971 
972 	const tFixPointu tyFract = ty & FIX_POINT_FRACT_MASK;
973 	const tFixPointu tyFractInv = FIX_POINT_ONE - tyFract;
974 
975 	const tFixPointu w00 = imulFixu ( txFractInv, tyFractInv );
976 	const tFixPointu w10 = imulFixu ( txFract	, tyFractInv );
977 	const tFixPointu w01 = imulFixu ( txFractInv, tyFract );
978 	const tFixPointu w11 = imulFixu ( txFract	, tyFract );
979 
980 	a =		(a00 * w00 ) +
981 			(a01 * w01 ) +
982 			(a10 * w10 ) +
983 			(a11 * w11 );
984 
985 	r =		(r00 * w00 ) +
986 			(r01 * w01 ) +
987 			(r10 * w10 ) +
988 			(r11 * w11 );
989 
990 	g =		(g00 * w00 ) +
991 			(g01 * w01 ) +
992 			(g10 * w10 ) +
993 			(g11 * w11 );
994 
995 	b =		(b00 * w00 ) +
996 			(b01 * w01 ) +
997 			(b10 * w10 ) +
998 			(b11 * w11 );
999 
1000 }
1001 
1002 
1003 #endif
1004 
1005 // some 2D Defines
1006 struct AbsRectangle
1007 {
1008 	s32 x0;
1009 	s32 y0;
1010 	s32 x1;
1011 	s32 y1;
1012 };
1013 
1014 //! 2D Intersection test
intersect(AbsRectangle & dest,const AbsRectangle & a,const AbsRectangle & b)1015 inline bool intersect ( AbsRectangle &dest, const AbsRectangle& a, const AbsRectangle& b)
1016 {
1017 	dest.x0 = core::s32_max( a.x0, b.x0 );
1018 	dest.y0 = core::s32_max( a.y0, b.y0 );
1019 	dest.x1 = core::s32_min( a.x1, b.x1 );
1020 	dest.y1 = core::s32_min( a.y1, b.y1 );
1021 	return dest.x0 < dest.x1 && dest.y0 < dest.y1;
1022 }
1023 
1024 // some 1D defines
1025 struct sIntervall
1026 {
1027 	s32 start;
1028 	s32 end;
1029 };
1030 
1031 // returning intersection width
intervall_intersect_test(const sIntervall & a,const sIntervall & b)1032 inline s32 intervall_intersect_test( const sIntervall& a, const sIntervall& b)
1033 {
1034 	return core::s32_min( a.end, b.end ) - core::s32_max( a.start, b.start );
1035 }
1036 
1037 
1038 } // end namespace irr
1039 
1040 #endif
1041 
1042