1 // Copyright (C) 2002-2012 Nikolaus Gebhardt / Thomas Alten
2 // This file is part of the "Irrlicht Engine".
3 // For conditions of distribution and use, see copyright notice in irrlicht.h
4 
5 /*
6 	History:
7 	- changed behavior for log2 textures ( replaced multiplies by shift )
8 */
9 
10 #ifndef __S_VIDEO_2_SOFTWARE_HELPER_H_INCLUDED__
11 #define __S_VIDEO_2_SOFTWARE_HELPER_H_INCLUDED__
12 
13 #include "SoftwareDriver2_compile_config.h"
14 #include "irrMath.h"
15 #include "CSoftwareTexture2.h"
16 #include "SMaterial.h"
17 
18 
19 
20 namespace irr
21 {
22 
23 // supporting different packed pixel needs many defines...
24 
25 #ifdef SOFTWARE_DRIVER_2_32BIT
26 	typedef u32	tVideoSample;
27 
28 	#define	MASK_A	0xFF000000
29 	#define	MASK_R	0x00FF0000
30 	#define	MASK_G	0x0000FF00
31 	#define	MASK_B	0x000000FF
32 
33 	#define	SHIFT_A	24
34 	#define	SHIFT_R	16
35 	#define	SHIFT_G	8
36 	#define	SHIFT_B	0
37 
38 	#define	COLOR_MAX					0xFF
39 	#define	COLOR_MAX_LOG2				8
40 	#define	COLOR_BRIGHT_WHITE			0xFFFFFFFF
41 
42 	#define VIDEO_SAMPLE_GRANULARITY	2
43 
44 #else
45 	typedef u16	tVideoSample;
46 
47 	#define	MASK_A	0x8000
48 	#define	MASK_R	0x7C00
49 	#define	MASK_G	0x03E0
50 	#define	MASK_B	0x001F
51 
52 	#define	SHIFT_A	15
53 	#define	SHIFT_R	10
54 	#define	SHIFT_G	5
55 	#define	SHIFT_B	0
56 
57 	#define	COLOR_MAX					0x1F
58 	#define	COLOR_MAX_LOG2				5
59 	#define	COLOR_BRIGHT_WHITE			0xFFFF
60 	#define VIDEO_SAMPLE_GRANULARITY	1
61 
62 #endif
63 
64 
65 
66 
67 // ----------------------- Generic ----------------------------------
68 
69 //! a more useful memset for pixel
70 // (standard memset only works with 8-bit values)
memset32(void * dest,const u32 value,u32 bytesize)71 inline void memset32(void * dest, const u32 value, u32 bytesize)
72 {
73 	u32 * d = (u32*) dest;
74 
75 	u32 i;
76 
77 	// loops unrolled to reduce the number of increments by factor ~8.
78 	i = bytesize >> (2 + 3);
79 	while (i)
80 	{
81 		d[0] = value;
82 		d[1] = value;
83 		d[2] = value;
84 		d[3] = value;
85 
86 		d[4] = value;
87 		d[5] = value;
88 		d[6] = value;
89 		d[7] = value;
90 
91 		d += 8;
92 		i -= 1;
93 	}
94 
95 	i = (bytesize >> 2 ) & 7;
96 	while (i)
97 	{
98 		d[0] = value;
99 		d += 1;
100 		i -= 1;
101 	}
102 }
103 
104 //! a more useful memset for pixel
105 // (standard memset only works with 8-bit values)
memset16(void * dest,const u16 value,u32 bytesize)106 inline void memset16(void * dest, const u16 value, u32 bytesize)
107 {
108 	u16 * d = (u16*) dest;
109 
110 	u32 i;
111 
112 	// loops unrolled to reduce the number of increments by factor ~8.
113 	i = bytesize >> (1 + 3);
114 	while (i)
115 	{
116 		d[0] = value;
117 		d[1] = value;
118 		d[2] = value;
119 		d[3] = value;
120 
121 		d[4] = value;
122 		d[5] = value;
123 		d[6] = value;
124 		d[7] = value;
125 
126 		d += 8;
127 		--i;
128 	}
129 
130 	i = (bytesize >> 1 ) & 7;
131 	while (i)
132 	{
133 		d[0] = value;
134 		++d;
135 		--i;
136 	}
137 }
138 
139 /*
140 	use biased loop counter
141 	--> 0 byte copy is forbidden
142 */
memcpy32_small(void * dest,const void * source,u32 bytesize)143 REALINLINE void memcpy32_small ( void * dest, const void *source, u32 bytesize )
144 {
145 	u32 c = bytesize >> 2;
146 
147 	do
148 	{
149 		((u32*) dest ) [ c-1 ] = ((u32*) source) [ c-1 ];
150 	} while ( --c );
151 
152 }
153 
154 
155 
156 // integer log2 of a float ieee 754. TODO: non ieee floating point
s32_log2_f32(f32 f)157 static inline s32 s32_log2_f32( f32 f)
158 {
159 	u32 x = IR ( f );
160 	return ((x & 0x7F800000) >> 23) - 127;
161 }
162 
s32_log2_s32(u32 x)163 static inline s32 s32_log2_s32(u32 x)
164 {
165 	return s32_log2_f32( (f32) x);
166 }
167 
s32_abs(s32 x)168 static inline s32 s32_abs(s32 x)
169 {
170 	s32 b = x >> 31;
171 	return (x ^ b ) - b;
172 }
173 
174 
175 //! conditional set based on mask and arithmetic shift
if_mask_a_else_b(const u32 mask,const u32 a,const u32 b)176 REALINLINE u32 if_mask_a_else_b ( const u32 mask, const u32 a, const u32 b )
177 {
178 	return ( mask & ( a ^ b ) ) ^ b;
179 }
180 
181 // ------------------ Video---------------------------------------
182 /*!
183 	Pixel = dest * ( 1 - alpha ) + source * alpha
184 	alpha [0;256]
185 */
PixelBlend32(const u32 c2,const u32 c1,u32 alpha)186 REALINLINE u32 PixelBlend32 ( const u32 c2, const u32 c1, u32 alpha )
187 {
188 	u32 srcRB = c1 & 0x00FF00FF;
189 	u32 srcXG = c1 & 0x0000FF00;
190 
191 	u32 dstRB = c2 & 0x00FF00FF;
192 	u32 dstXG = c2 & 0x0000FF00;
193 
194 
195 	u32 rb = srcRB - dstRB;
196 	u32 xg = srcXG - dstXG;
197 
198 	rb *= alpha;
199 	xg *= alpha;
200 	rb >>= 8;
201 	xg >>= 8;
202 
203 	rb += dstRB;
204 	xg += dstXG;
205 
206 	rb &= 0x00FF00FF;
207 	xg &= 0x0000FF00;
208 
209 	return rb | xg;
210 }
211 
212 /*!
213 	Pixel = dest * ( 1 - alpha ) + source * alpha
214 	alpha [0;32]
215 */
PixelBlend16(const u16 c2,const u32 c1,const u16 alpha)216 inline u16 PixelBlend16 ( const u16 c2, const u32 c1, const u16 alpha )
217 {
218 	const u16 srcRB = c1 & 0x7C1F;
219 	const u16 srcXG = c1 & 0x03E0;
220 
221 	const u16 dstRB = c2 & 0x7C1F;
222 	const u16 dstXG = c2 & 0x03E0;
223 
224 	u32 rb = srcRB - dstRB;
225 	u32 xg = srcXG - dstXG;
226 
227 	rb *= alpha;
228 	xg *= alpha;
229 	rb >>= 5;
230 	xg >>= 5;
231 
232 	rb += dstRB;
233 	xg += dstXG;
234 
235 	rb &= 0x7C1F;
236 	xg &= 0x03E0;
237 
238 	return (u16)(rb | xg);
239 }
240 
241 /*
242 	Pixel = c0 * (c1/31). c0 Alpha retain
243 */
PixelMul16(const u16 c0,const u16 c1)244 inline u16 PixelMul16 ( const u16 c0, const u16 c1)
245 {
246 	return (u16)((( ( (c0 & 0x7C00) * (c1 & 0x7C00) ) & 0x3E000000 ) >> 15 ) |
247 			(( ( (c0 & 0x03E0) * (c1 & 0x03E0) ) & 0x000F8000 ) >> 10 ) |
248 			(( ( (c0 & 0x001F) * (c1 & 0x001F) ) & 0x000003E0 ) >> 5 ) |
249 			(c0 & 0x8000));
250 }
251 
252 /*
253 	Pixel = c0 * (c1/31).
254 */
PixelMul16_2(u16 c0,u16 c1)255 inline u16 PixelMul16_2 ( u16 c0, u16 c1)
256 {
257 	return	(u16)(( ( (c0 & 0x7C00) * (c1 & 0x7C00) ) & 0x3E000000 ) >> 15 |
258 			( ( (c0 & 0x03E0) * (c1 & 0x03E0) ) & 0x000F8000 ) >> 10 |
259 			( ( (c0 & 0x001F) * (c1 & 0x001F) ) & 0x000003E0 ) >> 5  |
260 			( c0 & c1 & 0x8000));
261 }
262 
263 /*
264 	Pixel = c0 * (c1/255). c0 Alpha Retain
265 */
PixelMul32(const u32 c0,const u32 c1)266 REALINLINE u32 PixelMul32 ( const u32 c0, const u32 c1)
267 {
268 	return	(c0 & 0xFF000000) |
269 			(( ( (c0 & 0x00FF0000) >> 12 ) * ( (c1 & 0x00FF0000) >> 12 ) ) & 0x00FF0000 ) |
270 			(( ( (c0 & 0x0000FF00) * (c1 & 0x0000FF00) ) >> 16 ) & 0x0000FF00 ) |
271 			(( ( (c0 & 0x000000FF) * (c1 & 0x000000FF) ) >> 8  ) & 0x000000FF);
272 }
273 
274 /*
275 	Pixel = c0 * (c1/255).
276 */
PixelMul32_2(const u32 c0,const u32 c1)277 REALINLINE u32 PixelMul32_2 ( const u32 c0, const u32 c1)
278 {
279 	return	(( ( (c0 & 0xFF000000) >> 16 ) * ( (c1 & 0xFF000000) >> 16 ) ) & 0xFF000000 ) |
280 			(( ( (c0 & 0x00FF0000) >> 12 ) * ( (c1 & 0x00FF0000) >> 12 ) ) & 0x00FF0000 ) |
281 			(( ( (c0 & 0x0000FF00) * (c1 & 0x0000FF00) ) >> 16 ) & 0x0000FF00 ) |
282 			(( ( (c0 & 0x000000FF) * (c1 & 0x000000FF) ) >> 8  ) & 0x000000FF);
283 }
284 
285 /*
286 	Pixel = clamp ( c0 + c1, 0, 255 )
287 */
PixelAdd32(const u32 c2,const u32 c1)288 REALINLINE u32 PixelAdd32 ( const u32 c2, const u32 c1)
289 {
290 	u32 sum = ( c2 & 0x00FFFFFF )  + ( c1 & 0x00FFFFFF );
291 	u32 low_bits = ( c2 ^ c1 ) & 0x00010101;
292 	s32 carries  = ( sum - low_bits ) & 0x01010100;
293 	u32 modulo = sum - carries;
294 	u32 clamp = carries - ( carries >> 8 );
295 	return modulo | clamp;
296 }
297 
298 #if 0
299 
300 // 1 - Bit Alpha Blending
301 inline u16 PixelBlend16 ( const u16 destination, const u16 source )
302 {
303    if((source & 0x8000) == 0x8000)
304       return source; // The source is visible, so use it.
305    else
306       return destination; // The source is transparent, so use the destination.
307 }
308 
309 // 1 - Bit Alpha Blending 16Bit SIMD
310 inline u32 PixelBlend16_simd ( const u32 destination, const u32 source )
311 {
312 	switch(source & 0x80008000)
313 	{
314 		case 0x80008000: // Both source pixels are visible
315 			return source;
316 
317 		case 0x80000000: // Only the first source pixel is visible
318 			return (source & 0xFFFF0000) | (destination & 0x0000FFFF);
319 
320 		case 0x00008000: // Only the second source pixel is visible.
321 			return (destination & 0xFFFF0000) | (source & 0x0000FFFF);
322 
323 		default: // Neither source pixel is visible.
324 			return destination;
325 	}
326 }
327 #else
328 
329 // 1 - Bit Alpha Blending
PixelBlend16(const u16 c2,const u16 c1)330 inline u16 PixelBlend16 ( const u16 c2, const u16 c1 )
331 {
332 	u16 mask = ((c1 & 0x8000) >> 15 ) + 0x7fff;
333 	return (c2 & mask ) | ( c1 & ~mask );
334 }
335 
336 // 1 - Bit Alpha Blending 16Bit SIMD
PixelBlend16_simd(const u32 c2,const u32 c1)337 inline u32 PixelBlend16_simd ( const u32 c2, const u32 c1 )
338 {
339 	u32 mask = ((c1 & 0x80008000) >> 15 ) + 0x7fff7fff;
340 	return (c2 & mask ) | ( c1 & ~mask );
341 }
342 
343 #endif
344 
345 /*!
346 	Pixel = dest * ( 1 - SourceAlpha ) + source * SourceAlpha
347 */
PixelBlend32(const u32 c2,const u32 c1)348 inline u32 PixelBlend32 ( const u32 c2, const u32 c1 )
349 {
350 	// alpha test
351 	u32 alpha = c1 & 0xFF000000;
352 
353 	if ( 0 == alpha )
354 		return c2;
355 
356 	if ( 0xFF000000 == alpha )
357 	{
358 		return c1;
359 	}
360 
361 	alpha >>= 24;
362 
363 	// add highbit alpha, if ( alpha > 127 ) alpha += 1;
364 	alpha += ( alpha >> 7);
365 
366 	u32 srcRB = c1 & 0x00FF00FF;
367 	u32 srcXG = c1 & 0x0000FF00;
368 
369 	u32 dstRB = c2 & 0x00FF00FF;
370 	u32 dstXG = c2 & 0x0000FF00;
371 
372 
373 	u32 rb = srcRB - dstRB;
374 	u32 xg = srcXG - dstXG;
375 
376 	rb *= alpha;
377 	xg *= alpha;
378 	rb >>= 8;
379 	xg >>= 8;
380 
381 	rb += dstRB;
382 	xg += dstXG;
383 
384 	rb &= 0x00FF00FF;
385 	xg &= 0x0000FF00;
386 
387 	return (c1 & 0xFF000000) | rb | xg;
388 }
389 
390 
391 
392 // ------------------ Fix Point ----------------------------------
393 
394 typedef s32 tFixPoint;
395 typedef u32 tFixPointu;
396 
397 // Fix Point 12
398 #if 0
399 	#define FIX_POINT_PRE			12
400 	#define FIX_POINT_FRACT_MASK	0xFFF
401 	#define FIX_POINT_SIGNED_MASK	0xFFFFF000
402 	#define FIX_POINT_UNSIGNED_MASK	0x7FFFF000
403 	#define FIX_POINT_ONE			0x1000
404 	#define FIX_POINT_ZERO_DOT_FIVE	0x0800
405 	#define FIX_POINT_F32_MUL		4096.f
406 #endif
407 
408 // Fix Point 10
409 #if 1
410 	#define FIX_POINT_PRE			10
411 	#define FIX_POINT_FRACT_MASK	0x3FF
412 	#define FIX_POINT_SIGNED_MASK	0xFFFFFC00
413 	#define FIX_POINT_UNSIGNED_MASK	0x7FFFFE00
414 	#define FIX_POINT_ONE			0x400
415 	#define FIX_POINT_ZERO_DOT_FIVE	0x200
416 	#define FIX_POINT_F32_MUL		1024.f
417 #endif
418 
419 // Fix Point 9
420 #if 0
421 	#define FIX_POINT_PRE			9
422 	#define FIX_POINT_FRACT_MASK	0x1FF
423 	#define FIX_POINT_SIGNED_MASK	0xFFFFFE00
424 	#define FIX_POINT_UNSIGNED_MASK	0x7FFFFE00
425 	#define FIX_POINT_ONE			0x200
426 	#define FIX_POINT_ZERO_DOT_FIVE	0x100
427 	#define FIX_POINT_F32_MUL		512.f
428 #endif
429 
430 // Fix Point 7
431 #if 0
432 	#define FIX_POINT_PRE			7
433 	#define FIX_POINT_FRACT_MASK	0x7F
434 	#define FIX_POINT_SIGNED_MASK	0xFFFFFF80
435 	#define FIX_POINT_UNSIGNED_MASK	0x7FFFFF80
436 	#define FIX_POINT_ONE			0x80
437 	#define FIX_POINT_ZERO_DOT_FIVE	0x40
438 	#define FIX_POINT_F32_MUL		128.f
439 #endif
440 
441 #define	FIXPOINT_COLOR_MAX		( COLOR_MAX << FIX_POINT_PRE )
442 #define FIX_POINT_HALF_COLOR ( (tFixPoint) ( ((f32) COLOR_MAX / 2.f * FIX_POINT_F32_MUL ) ) )
443 
444 
445 /*
446 	convert signed integer to fixpoint
447 */
s32_to_fixPoint(const s32 x)448 inline tFixPoint s32_to_fixPoint (const s32 x)
449 {
450 	return x << FIX_POINT_PRE;
451 }
452 
u32_to_fixPoint(const u32 x)453 inline tFixPointu u32_to_fixPoint (const u32 x)
454 {
455 	return x << FIX_POINT_PRE;
456 }
457 
fixPointu_to_u32(const tFixPointu x)458 inline u32 fixPointu_to_u32 (const tFixPointu x)
459 {
460 	return x >> FIX_POINT_PRE;
461 }
462 
463 
464 // 1/x * FIX_POINT
fix_inverse32(const f32 x)465 REALINLINE f32 fix_inverse32 ( const f32 x )
466 {
467 	return FIX_POINT_F32_MUL / x;
468 }
469 
470 
471 /*
472 	convert float to fixpoint
473 	fast convert (fistp on x86) HAS to be used..
474 	hints: compileflag /QIfist for msvc7. msvc 8.0 has smth different
475 	others should use their favourite assembler..
476 */
f_round2(f32 f)477 static inline int f_round2(f32 f)
478 {
479 	f += (3<<22);
480 	return IR(f) - 0x4b400000;
481 }
482 
483 /*
484 	convert f32 to Fix Point.
485 	multiply is needed anyway, so scale mulby
486 */
487 REALINLINE tFixPoint tofix (const f32 x, const f32 mulby = FIX_POINT_F32_MUL )
488 {
489 	return (tFixPoint) (x * mulby);
490 }
491 
492 
493 /*
494 	Fix Point , Fix Point Multiply
495 */
imulFixu(const tFixPointu x,const tFixPointu y)496 REALINLINE tFixPointu imulFixu(const tFixPointu x, const tFixPointu y)
497 {
498 	return (x * y) >> (tFixPointu) FIX_POINT_PRE;
499 }
500 
501 /*
502 	Fix Point , Fix Point Multiply
503 */
imulFix(const tFixPoint x,const tFixPoint y)504 REALINLINE tFixPoint imulFix(const tFixPoint x, const tFixPoint y)
505 {
506 	return ( x * y) >> ( FIX_POINT_PRE );
507 }
508 
509 /*
510 	Fix Point , Fix Point Multiply x * y * 2
511 */
imulFix2(const tFixPoint x,const tFixPoint y)512 REALINLINE tFixPoint imulFix2(const tFixPoint x, const tFixPoint y)
513 {
514 	return ( x * y) >> ( FIX_POINT_PRE -1 );
515 }
516 
517 
518 /*
519 	Multiply x * y * 1
520 */
imulFix_tex1(const tFixPoint x,const tFixPoint y)521 REALINLINE tFixPoint imulFix_tex1(const tFixPoint x, const tFixPoint y)
522 {
523 	return ( ( (tFixPointu) x >> 2 ) * ( (tFixPointu) y >> 2 ) ) >> (tFixPointu) ( FIX_POINT_PRE + 4 );
524 }
525 
526 /*
527 	Multiply x * y * 2
528 */
imulFix_tex2(const tFixPoint x,const tFixPoint y)529 REALINLINE tFixPoint imulFix_tex2(const tFixPoint x, const tFixPoint y)
530 {
531 	return ( ( (tFixPointu) x >> 2 ) * ( (tFixPointu) y >> 2 ) ) >> (tFixPointu) ( FIX_POINT_PRE + 3 );
532 }
533 
534 /*
535 	Multiply x * y * 4
536 */
imulFix_tex4(const tFixPoint x,const tFixPoint y)537 REALINLINE tFixPoint imulFix_tex4(const tFixPoint x, const tFixPoint y)
538 {
539 #ifdef SOFTWARE_DRIVER_2_32BIT
540 	return ( ( (tFixPointu) x >> 2 ) * ( (tFixPointu) y >> 2 ) ) >> (tFixPointu) ( FIX_POINT_PRE + 2 );
541 #else
542 	return ( x * y) >> ( FIX_POINT_PRE + ( VIDEO_SAMPLE_GRANULARITY * 3 ) );
543 #endif
544 }
545 
546 /*!
547 	clamp FixPoint to maxcolor in FixPoint, min(a,31)
548 */
clampfix_maxcolor(const tFixPoint a)549 REALINLINE tFixPoint clampfix_maxcolor ( const tFixPoint a)
550 {
551 	tFixPoint c = (a - FIXPOINT_COLOR_MAX) >> 31;
552 	return (a & c) | ( FIXPOINT_COLOR_MAX & ~c);
553 }
554 
555 /*!
556 	clamp FixPoint to 0 in FixPoint, max(a,0)
557 */
clampfix_mincolor(const tFixPoint a)558 REALINLINE tFixPoint clampfix_mincolor ( const tFixPoint a)
559 {
560 	return a - ( a & ( a >> 31 ) );
561 }
562 
saturateFix(const tFixPoint a)563 REALINLINE tFixPoint saturateFix ( const tFixPoint a)
564 {
565 	return clampfix_mincolor ( clampfix_maxcolor ( a ) );
566 }
567 
568 
569 // rount fixpoint to int
roundFix(const tFixPoint x)570 inline s32 roundFix ( const tFixPoint x )
571 {
572 	return ( x + FIX_POINT_ZERO_DOT_FIVE ) >> FIX_POINT_PRE;
573 }
574 
575 
576 
577 // x in [0;1[
f32_to_23Bits(const f32 x)578 inline s32 f32_to_23Bits(const f32 x)
579 {
580     f32 y = x + 1.f;
581     return IR(y) & 0x7FFFFF;	// last 23 bits
582 }
583 
584 /*!
585 	return VideoSample from fixpoint
586 */
fix_to_color(const tFixPoint r,const tFixPoint g,const tFixPoint b)587 REALINLINE tVideoSample fix_to_color ( const tFixPoint r, const tFixPoint g, const tFixPoint b )
588 {
589 #ifdef __BIG_ENDIAN__
590 	return	FIXPOINT_COLOR_MAX |
591 			( r & FIXPOINT_COLOR_MAX) >> ( FIX_POINT_PRE - 8) |
592 			( g & FIXPOINT_COLOR_MAX) << ( 16 - FIX_POINT_PRE ) |
593 			( b & FIXPOINT_COLOR_MAX) << ( 24 - FIX_POINT_PRE );
594 #else
595 	return	( FIXPOINT_COLOR_MAX & FIXPOINT_COLOR_MAX) << ( SHIFT_A - FIX_POINT_PRE ) |
596 			( r & FIXPOINT_COLOR_MAX) << ( SHIFT_R - FIX_POINT_PRE ) |
597 			( g & FIXPOINT_COLOR_MAX) >> ( FIX_POINT_PRE - SHIFT_G ) |
598 			( b & FIXPOINT_COLOR_MAX) >> ( FIX_POINT_PRE - SHIFT_B );
599 #endif
600 }
601 
602 
603 /*!
604 	return VideoSample from fixpoint
605 */
fix4_to_color(const tFixPoint a,const tFixPoint r,const tFixPoint g,const tFixPoint b)606 REALINLINE tVideoSample fix4_to_color ( const tFixPoint a, const tFixPoint r, const tFixPoint g, const tFixPoint b )
607 {
608 #ifdef __BIG_ENDIAN__
609 	return	( a & (FIX_POINT_FRACT_MASK - 1 )) >> ( FIX_POINT_PRE ) |
610 			( r & FIXPOINT_COLOR_MAX) >> ( FIX_POINT_PRE - 8) |
611 			( g & FIXPOINT_COLOR_MAX) << ( 16 - FIX_POINT_PRE ) |
612 			( b & FIXPOINT_COLOR_MAX) << ( 24 - FIX_POINT_PRE );
613 #else
614 	return	( a & (FIX_POINT_FRACT_MASK - 1 )) << ( SHIFT_A - 1 ) |
615 			( r & FIXPOINT_COLOR_MAX) << ( SHIFT_R - FIX_POINT_PRE ) |
616 			( g & FIXPOINT_COLOR_MAX) >> ( FIX_POINT_PRE - SHIFT_G ) |
617 			( b & FIXPOINT_COLOR_MAX) >> ( FIX_POINT_PRE - SHIFT_B );
618 #endif
619 
620 }
621 
622 /*!
623 	return fixpoint from VideoSample granularity COLOR_MAX
624 */
color_to_fix(tFixPoint & r,tFixPoint & g,tFixPoint & b,const tVideoSample t00)625 inline void color_to_fix ( tFixPoint &r, tFixPoint &g, tFixPoint &b, const tVideoSample t00 )
626 {
627 	(tFixPointu&) r	 =	(t00 & MASK_R) >> ( SHIFT_R - FIX_POINT_PRE );
628 	(tFixPointu&) g	 =	(t00 & MASK_G) << ( FIX_POINT_PRE - SHIFT_G );
629 	(tFixPointu&) b	 =	(t00 & MASK_B) << ( FIX_POINT_PRE - SHIFT_B );
630 }
631 
632 /*!
633 	return fixpoint from VideoSample granularity COLOR_MAX
634 */
color_to_fix(tFixPoint & a,tFixPoint & r,tFixPoint & g,tFixPoint & b,const tVideoSample t00)635 inline void color_to_fix ( tFixPoint &a, tFixPoint &r, tFixPoint &g, tFixPoint &b, const tVideoSample t00 )
636 {
637 	(tFixPointu&) a	 =	(t00 & MASK_A) >> ( SHIFT_A - FIX_POINT_PRE );
638 	(tFixPointu&) r	 =	(t00 & MASK_R) >> ( SHIFT_R - FIX_POINT_PRE );
639 	(tFixPointu&) g	 =	(t00 & MASK_G) << ( FIX_POINT_PRE - SHIFT_G );
640 	(tFixPointu&) b	 =	(t00 & MASK_B) << ( FIX_POINT_PRE - SHIFT_B );
641 }
642 
643 /*!
644 	return fixpoint from VideoSample granularity 0..FIX_POINT_ONE
645 */
color_to_fix1(tFixPoint & r,tFixPoint & g,tFixPoint & b,const tVideoSample t00)646 inline void color_to_fix1 ( tFixPoint &r, tFixPoint &g, tFixPoint &b, const tVideoSample t00 )
647 {
648 	(tFixPointu&) r	 =	(t00 & MASK_R) >> ( SHIFT_R + COLOR_MAX_LOG2 - FIX_POINT_PRE );
649 	(tFixPointu&) g	 =	(t00 & MASK_G) >> ( SHIFT_G + COLOR_MAX_LOG2 - FIX_POINT_PRE );
650 	(tFixPointu&) b	 =	(t00 & MASK_B) << ( FIX_POINT_PRE - COLOR_MAX_LOG2 );
651 }
652 
653 /*!
654 	return fixpoint from VideoSample granularity 0..FIX_POINT_ONE
655 */
color_to_fix1(tFixPoint & a,tFixPoint & r,tFixPoint & g,tFixPoint & b,const tVideoSample t00)656 inline void color_to_fix1 ( tFixPoint &a, tFixPoint &r, tFixPoint &g, tFixPoint &b, const tVideoSample t00 )
657 {
658 	(tFixPointu&) a	 =	(t00 & MASK_A) >> ( SHIFT_A + COLOR_MAX_LOG2 - FIX_POINT_PRE );
659 	(tFixPointu&) r	 =	(t00 & MASK_R) >> ( SHIFT_R + COLOR_MAX_LOG2 - FIX_POINT_PRE );
660 	(tFixPointu&) g	 =	(t00 & MASK_G) >> ( SHIFT_G + COLOR_MAX_LOG2 - FIX_POINT_PRE );
661 	(tFixPointu&) b	 =	(t00 & MASK_B) << ( FIX_POINT_PRE - COLOR_MAX_LOG2 );
662 }
663 
664 
665 
666 // ----- FP24 ---- floating point z-buffer
667 
668 #if 1
669 typedef f32 fp24;
670 #else
671 struct fp24
672 {
673 	u32 v;
674 
fp24fp24675 	fp24() {}
676 
fp24fp24677 	fp24 ( const f32 f )
678 	{
679 	    f32 y = f + 1.f;
680 	    v = ((u32&)y) & 0x7FFFFF;	// last 23 bits
681 	}
682 
683 	void operator=(const f32 f )
684 	{
685 	    f32 y = f + 1.f;
686 	    v = ((u32&)y) & 0x7FFFFF;	// last 23 bits
687 	}
688 
689 	void operator+=(const fp24 &other )
690 	{
691 		v += other.v;
692 	}
693 
f32fp24694 	operator f32 () const
695 	{
696 		f32 r = FR ( v );
697 		return r + 1.f;
698 	}
699 
700 };
701 #endif
702 
703 
704 // ------------------------ Internal Texture -----------------------------
705 
706 struct sInternalTexture
707 {
708 	u32 textureXMask;
709 	u32 textureYMask;
710 
711 	u32 pitchlog2;
712 	void *data;
713 
714 	video::CSoftwareTexture2 *Texture;
715 	s32 lodLevel;
716 };
717 
718 
719 
720 // get video sample plain
getTexel_plain(const sInternalTexture * t,const tFixPointu tx,const tFixPointu ty)721 inline tVideoSample getTexel_plain ( const sInternalTexture * t, const tFixPointu tx, const tFixPointu ty )
722 {
723 	u32 ofs;
724 
725 	ofs = ( ( ty & t->textureYMask ) >> FIX_POINT_PRE ) << t->pitchlog2;
726 	ofs |= ( tx & t->textureXMask ) >> ( FIX_POINT_PRE - VIDEO_SAMPLE_GRANULARITY );
727 
728 	// texel
729 	return *((tVideoSample*)( (u8*) t->data + ofs ));
730 }
731 
732 // get video sample to fix
getTexel_fix(tFixPoint & r,tFixPoint & g,tFixPoint & b,const sInternalTexture * t,const tFixPointu tx,const tFixPointu ty)733 inline void getTexel_fix ( tFixPoint &r, tFixPoint &g, tFixPoint &b,
734 						const sInternalTexture * t, const tFixPointu tx, const tFixPointu ty
735 								)
736 {
737 	u32 ofs;
738 
739 	ofs = ( ( ty & t->textureYMask ) >> FIX_POINT_PRE ) << t->pitchlog2;
740 	ofs |= ( tx & t->textureXMask ) >> ( FIX_POINT_PRE - VIDEO_SAMPLE_GRANULARITY );
741 
742 	// texel
743 	tVideoSample t00;
744 	t00 = *((tVideoSample*)( (u8*) t->data + ofs ));
745 
746 	r	 =	(t00 & MASK_R) >> ( SHIFT_R - FIX_POINT_PRE);
747 	g	 =	(t00 & MASK_G) << ( FIX_POINT_PRE - SHIFT_G );
748 	b	 =	(t00 & MASK_B) << ( FIX_POINT_PRE - SHIFT_B );
749 
750 }
751 
752 // get video sample to fixpoint
getTexel_fix(tFixPoint & a,const sInternalTexture * t,const tFixPointu tx,const tFixPointu ty)753 REALINLINE void getTexel_fix ( tFixPoint &a,
754 								const sInternalTexture * t, const tFixPointu tx, const tFixPointu ty
755 								)
756 {
757 	u32 ofs;
758 
759 	ofs = ( ( ty & t->textureYMask ) >> FIX_POINT_PRE ) << t->pitchlog2;
760 	ofs |= ( tx & t->textureXMask ) >> ( FIX_POINT_PRE - VIDEO_SAMPLE_GRANULARITY );
761 
762 	// texel
763 	tVideoSample t00;
764 	t00 = *((tVideoSample*)( (u8*) t->data + ofs ));
765 
766 	a	 =	(t00 & MASK_A) >> ( SHIFT_A - FIX_POINT_PRE);
767 }
768 
769 
getSample_texture_dither(tFixPoint & r,tFixPoint & g,tFixPoint & b,const sInternalTexture * t,const tFixPointu tx,const tFixPointu ty,const u32 x,const u32 y)770 inline void getSample_texture_dither (	tFixPoint &r, tFixPoint &g, tFixPoint &b,
771 										const sInternalTexture * t, const tFixPointu tx, const tFixPointu ty,
772 										const u32 x, const u32 y
773 								)
774 {
775 	static const tFixPointu dithermask[] =
776 	{
777 		0x00,0x80,0x20,0xa0,
778 		0xc0,0x40,0xe0,0x60,
779 		0x30,0xb0,0x10,0x90,
780 		0xf0,0x70,0xd0,0x50
781 	};
782 
783 	const u32 index = (y & 3 ) << 2 | (x & 3);
784 
785 	const tFixPointu _ntx = (tx + dithermask [ index ] ) & t->textureXMask;
786 	const tFixPointu _nty = (ty + dithermask [ index ] ) & t->textureYMask;
787 
788 	u32 ofs;
789 	ofs = ( ( _nty ) >> FIX_POINT_PRE ) << t->pitchlog2;
790 	ofs |= ( _ntx ) >> ( FIX_POINT_PRE - VIDEO_SAMPLE_GRANULARITY );
791 
792 	// texel
793 	const tVideoSample t00 = *((tVideoSample*)( (u8*) t->data + ofs ));
794 
795 	(tFixPointu &) r	 =	(t00 & MASK_R) >> ( SHIFT_R - FIX_POINT_PRE);
796 	(tFixPointu &) g	 =	(t00 & MASK_G) << ( FIX_POINT_PRE - SHIFT_G );
797 	(tFixPointu &) b	 =	(t00 & MASK_B) << ( FIX_POINT_PRE - SHIFT_B );
798 
799 }
800 
801 /*
802 	load a sample from internal texture at position tx,ty to fixpoint
803 */
804 #ifndef SOFTWARE_DRIVER_2_BILINEAR
805 
806 // get Sample linear == getSample_fixpoint
807 
getSample_texture(tFixPoint & r,tFixPoint & g,tFixPoint & b,const sInternalTexture * t,const tFixPointu tx,const tFixPointu ty)808 inline void getSample_texture ( tFixPoint &r, tFixPoint &g, tFixPoint &b,
809 						const sInternalTexture * t, const tFixPointu tx, const tFixPointu ty
810 								)
811 {
812 	u32 ofs;
813 
814 	ofs = ( ( ty & t->textureYMask ) >> FIX_POINT_PRE ) << t->pitchlog2;
815 	ofs |= ( tx & t->textureXMask ) >> ( FIX_POINT_PRE - VIDEO_SAMPLE_GRANULARITY );
816 
817 	// texel
818 	const tVideoSample t00 = *((tVideoSample*)( (u8*) t->data + ofs ));
819 
820 	(tFixPointu &) r	 =	(t00 & MASK_R) >> ( SHIFT_R - FIX_POINT_PRE);
821 	(tFixPointu &) g	 =	(t00 & MASK_G) << ( FIX_POINT_PRE - SHIFT_G );
822 	(tFixPointu &) b	 =	(t00 & MASK_B) << ( FIX_POINT_PRE - SHIFT_B );
823 }
824 
getSample_texture(tFixPointu & a,tFixPointu & r,tFixPointu & g,tFixPointu & b,const sInternalTexture * t,const tFixPointu tx,const tFixPointu ty)825 inline void getSample_texture ( tFixPointu &a, tFixPointu &r, tFixPointu &g, tFixPointu &b,
826 						const sInternalTexture * t, const tFixPointu tx, const tFixPointu ty
827 								)
828 {
829 	u32 ofs;
830 
831 	ofs = ( ( ty & t->textureYMask ) >> FIX_POINT_PRE ) << t->pitchlog2;
832 	ofs |= ( tx & t->textureXMask ) >> ( FIX_POINT_PRE - VIDEO_SAMPLE_GRANULARITY );
833 
834 	// texel
835 	const tVideoSample t00 = *((tVideoSample*)( (u8*) t->data + ofs ));
836 
837 	(tFixPointu &)a	 =	(t00 & MASK_A) >> ( SHIFT_A - FIX_POINT_PRE);
838 	(tFixPointu &)r	 =	(t00 & MASK_R) >> ( SHIFT_R - FIX_POINT_PRE);
839 	(tFixPointu &)g	 =	(t00 & MASK_G) << ( FIX_POINT_PRE - SHIFT_G );
840 	(tFixPointu &)b	 =	(t00 & MASK_B) << ( FIX_POINT_PRE - SHIFT_B );
841 }
842 
843 
844 #else
845 
846 
847 // get sample linear
getSample_linear(tFixPointu & r,tFixPointu & g,tFixPointu & b,const sInternalTexture * t,const tFixPointu tx,const tFixPointu ty)848 REALINLINE void getSample_linear ( tFixPointu &r, tFixPointu &g, tFixPointu &b,
849 								const sInternalTexture * t, const tFixPointu tx, const tFixPointu ty
850 								)
851 {
852 	u32 ofs;
853 
854 	ofs = ( ( ty & t->textureYMask ) >> FIX_POINT_PRE ) << t->pitchlog2;
855 	ofs |= ( tx & t->textureXMask ) >> ( FIX_POINT_PRE - VIDEO_SAMPLE_GRANULARITY );
856 
857 	// texel
858 	tVideoSample t00;
859 	t00 = *((tVideoSample*)( (u8*) t->data + ofs ));
860 
861 	r	 =	(t00 & MASK_R) >> SHIFT_R;
862 	g	 =	(t00 & MASK_G) >> SHIFT_G;
863 	b	 =	(t00 & MASK_B);
864 }
865 
866 // get Sample bilinear
getSample_texture(tFixPoint & r,tFixPoint & g,tFixPoint & b,const sInternalTexture * t,const tFixPointu tx,const tFixPointu ty)867 REALINLINE void getSample_texture ( tFixPoint &r, tFixPoint &g, tFixPoint &b,
868 								const sInternalTexture * t, const tFixPointu tx, const tFixPointu ty
869 								)
870 {
871 
872 	tFixPointu r00,g00,b00;
873 	tFixPointu r01,g01,b01;
874 	tFixPointu r10,g10,b10;
875 	tFixPointu r11,g11,b11;
876 
877 #if 0
878 	getSample_linear ( r00, g00, b00, t, tx,ty );
879 	getSample_linear ( r10, g10, b10, t, tx + FIX_POINT_ONE,ty );
880 	getSample_linear ( r01, g01, b01, t, tx,ty + FIX_POINT_ONE );
881 	getSample_linear ( r11, g11, b11, t, tx + FIX_POINT_ONE,ty + FIX_POINT_ONE );
882 #else
883 	u32 o0, o1,o2,o3;
884 	tVideoSample t00;
885 
886 	o0 = ( ( (ty) & t->textureYMask ) >> FIX_POINT_PRE ) << t->pitchlog2;
887 	o1 = ( ( (ty+FIX_POINT_ONE) & t->textureYMask ) >> FIX_POINT_PRE ) << t->pitchlog2;
888 	o2 =   ( (tx) & t->textureXMask ) >> ( FIX_POINT_PRE - VIDEO_SAMPLE_GRANULARITY );
889 	o3 =   ( (tx+FIX_POINT_ONE) & t->textureXMask ) >> ( FIX_POINT_PRE - VIDEO_SAMPLE_GRANULARITY );
890 
891 	t00 = *((tVideoSample*)( (u8*) t->data + (o0 | o2 ) ));
892 	r00	 =	(t00 & MASK_R) >> SHIFT_R; g00  =	(t00 & MASK_G) >> SHIFT_G; b00	 =	(t00 & MASK_B);
893 
894 	t00 = *((tVideoSample*)( (u8*) t->data + (o0 | o3 ) ));
895 	r10	 =	(t00 & MASK_R) >> SHIFT_R; g10  =	(t00 & MASK_G) >> SHIFT_G; b10	 =	(t00 & MASK_B);
896 
897 	t00 = *((tVideoSample*)( (u8*) t->data + (o1 | o2 ) ));
898 	r01	 =	(t00 & MASK_R) >> SHIFT_R; g01  =	(t00 & MASK_G) >> SHIFT_G; b01	 =	(t00 & MASK_B);
899 
900 	t00 = *((tVideoSample*)( (u8*) t->data + (o1 | o3 ) ));
901 	r11	 =	(t00 & MASK_R) >> SHIFT_R; g11  =	(t00 & MASK_G) >> SHIFT_G; b11	 =	(t00 & MASK_B);
902 
903 #endif
904 
905 	const tFixPointu txFract = tx & FIX_POINT_FRACT_MASK;
906 	const tFixPointu txFractInv = FIX_POINT_ONE - txFract;
907 
908 	const tFixPointu tyFract = ty & FIX_POINT_FRACT_MASK;
909 	const tFixPointu tyFractInv = FIX_POINT_ONE - tyFract;
910 
911 	const tFixPointu w00 = imulFixu ( txFractInv, tyFractInv );
912 	const tFixPointu w10 = imulFixu ( txFract	, tyFractInv );
913 	const tFixPointu w01 = imulFixu ( txFractInv, tyFract );
914 	const tFixPointu w11 = imulFixu ( txFract	, tyFract );
915 
916 	r =		(r00 * w00 ) +
917 			(r01 * w01 ) +
918 			(r10 * w10 ) +
919 			(r11 * w11 );
920 
921 	g =		(g00 * w00 ) +
922 			(g01 * w01 ) +
923 			(g10 * w10 ) +
924 			(g11 * w11 );
925 
926 	b =		(b00 * w00 ) +
927 			(b01 * w01 ) +
928 			(b10 * w10 ) +
929 			(b11 * w11 );
930 
931 }
932 
933 
934 // get sample linear
getSample_linear(tFixPointu & a,tFixPointu & r,tFixPointu & g,tFixPointu & b,const sInternalTexture * t,const tFixPointu tx,const tFixPointu ty)935 REALINLINE void getSample_linear ( tFixPointu &a, tFixPointu &r, tFixPointu &g, tFixPointu &b,
936 								const sInternalTexture * t, const tFixPointu tx, const tFixPointu ty
937 								)
938 {
939 	u32 ofs;
940 
941 	ofs = ( ( ty & t->textureYMask ) >> FIX_POINT_PRE ) << t->pitchlog2;
942 	ofs |= ( tx & t->textureXMask ) >> ( FIX_POINT_PRE - VIDEO_SAMPLE_GRANULARITY );
943 
944 	// texel
945 	tVideoSample t00;
946 	t00 = *((tVideoSample*)( (u8*) t->data + ofs ));
947 
948 	a	 =	(t00 & MASK_A) >> SHIFT_A;
949 	r	 =	(t00 & MASK_R) >> SHIFT_R;
950 	g	 =	(t00 & MASK_G) >> SHIFT_G;
951 	b	 =	(t00 & MASK_B);
952 }
953 
954 // get Sample bilinear
getSample_texture(tFixPoint & a,tFixPoint & r,tFixPoint & g,tFixPoint & b,const sInternalTexture * t,const tFixPointu tx,const tFixPointu ty)955 REALINLINE void getSample_texture ( tFixPoint &a, tFixPoint &r, tFixPoint &g, tFixPoint &b,
956 								const sInternalTexture * t, const tFixPointu tx, const tFixPointu ty
957 								)
958 {
959 
960 	tFixPointu a00, r00,g00,b00;
961 	tFixPointu a01, r01,g01,b01;
962 	tFixPointu a10, r10,g10,b10;
963 	tFixPointu a11, r11,g11,b11;
964 
965 	getSample_linear ( a00, r00, g00, b00, t, tx,ty );
966 	getSample_linear ( a10, r10, g10, b10, t, tx + FIX_POINT_ONE,ty );
967 	getSample_linear ( a01, r01, g01, b01, t, tx,ty + FIX_POINT_ONE );
968 	getSample_linear ( a11, r11, g11, b11, t, tx + FIX_POINT_ONE,ty + FIX_POINT_ONE );
969 
970 	const tFixPointu txFract = tx & FIX_POINT_FRACT_MASK;
971 	const tFixPointu txFractInv = FIX_POINT_ONE - txFract;
972 
973 	const tFixPointu tyFract = ty & FIX_POINT_FRACT_MASK;
974 	const tFixPointu tyFractInv = FIX_POINT_ONE - tyFract;
975 
976 	const tFixPointu w00 = imulFixu ( txFractInv, tyFractInv );
977 	const tFixPointu w10 = imulFixu ( txFract	, tyFractInv );
978 	const tFixPointu w01 = imulFixu ( txFractInv, tyFract );
979 	const tFixPointu w11 = imulFixu ( txFract	, tyFract );
980 
981 	a =		(a00 * w00 ) +
982 			(a01 * w01 ) +
983 			(a10 * w10 ) +
984 			(a11 * w11 );
985 
986 	r =		(r00 * w00 ) +
987 			(r01 * w01 ) +
988 			(r10 * w10 ) +
989 			(r11 * w11 );
990 
991 	g =		(g00 * w00 ) +
992 			(g01 * w01 ) +
993 			(g10 * w10 ) +
994 			(g11 * w11 );
995 
996 	b =		(b00 * w00 ) +
997 			(b01 * w01 ) +
998 			(b10 * w10 ) +
999 			(b11 * w11 );
1000 
1001 }
1002 
1003 
1004 #endif
1005 
1006 // some 2D Defines
1007 struct AbsRectangle
1008 {
1009 	s32 x0;
1010 	s32 y0;
1011 	s32 x1;
1012 	s32 y1;
1013 };
1014 
1015 //! 2D Intersection test
intersect(AbsRectangle & dest,const AbsRectangle & a,const AbsRectangle & b)1016 inline bool intersect ( AbsRectangle &dest, const AbsRectangle& a, const AbsRectangle& b)
1017 {
1018 	dest.x0 = core::s32_max( a.x0, b.x0 );
1019 	dest.y0 = core::s32_max( a.y0, b.y0 );
1020 	dest.x1 = core::s32_min( a.x1, b.x1 );
1021 	dest.y1 = core::s32_min( a.y1, b.y1 );
1022 	return dest.x0 < dest.x1 && dest.y0 < dest.y1;
1023 }
1024 
1025 // some 1D defines
1026 struct sIntervall
1027 {
1028 	s32 start;
1029 	s32 end;
1030 };
1031 
1032 // returning intersection width
intervall_intersect_test(const sIntervall & a,const sIntervall & b)1033 inline s32 intervall_intersect_test( const sIntervall& a, const sIntervall& b)
1034 {
1035 	return core::s32_min( a.end, b.end ) - core::s32_max( a.start, b.start );
1036 }
1037 
1038 
1039 } // end namespace irr
1040 
1041 #endif
1042 
1043