1 // license:BSD-3-Clause
2 // copyright-holders:David Haywood
3 /* This is the inner-most loop code (per-pixel) and thus the most performance sensitive part */
4 
5 #if FLIPX == 1
6 #define LOOP_INCREMENTS \
7 			bmp++;  \
8 			gfx2--;
9 #else
10 
11 #define LOOP_INCREMENTS \
12 			bmp++;  \
13 			gfx2++;
14 #endif
15 
16 
17 /*************** REALLY SIMPLE INNER LOOP, NON-BLENDED, NON-TINTED, SIMPLEST CASE ****************/
18 #if REALLY_SIMPLE == 1
19 
20 #if TRANSPARENT == 1
21 			pen = *gfx2;
22 			if (pen & 0x20000000)
23 			{
24 			*bmp = pen;
25 #else
26 			*bmp = *gfx2;
27 #endif
28 
29 /*************** REGULAR INNER LOOPS ****************/
30 #else // NOT REALLY_SIMPLE
31 
32 			pen = *gfx2;
33 
34 #if TRANSPARENT == 1
35 			if (pen & 0x20000000)
36 			{
37 #endif
38 
39 			// convert source to clr
40 			pen_to_clr(pen, &s_clr.trgb);
41 			//s_clr.data = (pen >> 3); // using the union is actually significantly slower than our pen_to_clr to function!
42 			// source * intesity and clamp
43 
44 #if TINT == 1
45 			s_clr.trgb.mul(*tint_clr);
46 #endif
47 
48 			#if BLENDED == 1
49 
50 				// convert destination to clr
51 				pen_to_clr(*bmp, &d_clr.trgb);
52 				//d_clr.data = *bmp >> 3; // using the union is actually significantly slower than our pen_to_clr to function!
53 				#if _SMODE == 0
54 				//g_profiler.start(PROFILER_USER7);
55 
56 					#if _DMODE == 0
57 					//g_profiler.start(PROFILER_USER1);
58 					// this is used extensively in the games (ingame, futari title screens etc.)
59 
60 					s_clr.trgb.r = colrtable_add[salpha_table[(s_clr.trgb.r)]][dalpha_table[(d_clr.trgb.r)]];
61 					s_clr.trgb.g = colrtable_add[salpha_table[(s_clr.trgb.g)]][dalpha_table[(d_clr.trgb.g)]];
62 					s_clr.trgb.b = colrtable_add[salpha_table[(s_clr.trgb.b)]][dalpha_table[(d_clr.trgb.b)]];
63 					#elif _DMODE == 1
64 					//g_profiler.start(PROFILER_USER2);
65 					// futari ~7%
66 					s_clr.trgb.r = colrtable_add[salpha_table[(s_clr.trgb.r)]][colrtable[(s_clr.trgb.r)][(d_clr.trgb.r)]];
67 					s_clr.trgb.g = colrtable_add[salpha_table[(s_clr.trgb.g)]][colrtable[(s_clr.trgb.g)][(d_clr.trgb.g)]];
68 					s_clr.trgb.b = colrtable_add[salpha_table[(s_clr.trgb.b)]][colrtable[(s_clr.trgb.b)][(d_clr.trgb.b)]];
69 					#elif _DMODE == 2
70 					//g_profiler.start(PROFILER_USER3);
71 					clr0.trgb.mul_fixed(s_alpha, s_clr.trgb);
72 					s_clr.trgb.add_with_clr_square(clr0.trgb, d_clr.trgb);
73 					#elif _DMODE == 3
74 					//g_profiler.start(PROFILER_USER4);
75 					clr0.trgb.mul_fixed(s_alpha, s_clr.trgb);
76 					s_clr.trgb.add(clr0.trgb, d_clr.trgb);
77 
78 					#elif _DMODE == 4
79 					//g_profiler.start(PROFILER_USER5);
80 					clr0.trgb.mul_fixed(s_alpha, s_clr.trgb);
81 					s_clr.trgb.add_with_clr_mul_fixed_rev(clr0.trgb, d_alpha, d_clr.trgb);
82 					#elif _DMODE == 5
83 					// futari black character select ~13%
84 					//g_profiler.start(PROFILER_USER6);
85 					s_clr.trgb.r =  colrtable_add[salpha_table[(s_clr.trgb.r)]][colrtable_rev[(s_clr.trgb.r)][(d_clr.trgb.r)]];
86 					s_clr.trgb.g =  colrtable_add[salpha_table[(s_clr.trgb.g)]][colrtable_rev[(s_clr.trgb.g)][(d_clr.trgb.g)]];
87 					s_clr.trgb.b =  colrtable_add[salpha_table[(s_clr.trgb.b)]][colrtable_rev[(s_clr.trgb.b)][(d_clr.trgb.b)]];
88 
89 					#elif _DMODE == 6
90 					//g_profiler.start(PROFILER_USER7);
91 					clr0.trgb.mul_fixed(s_alpha, s_clr.trgb);
92 					s_clr.trgb.add_with_clr_mul_rev_square(clr0.trgb, d_clr.trgb);
93 					#elif _DMODE == 7
94 					//g_profiler.start(PROFILER_USER8);
95 					clr0.trgb.mul_fixed(s_alpha, s_clr.trgb);
96 					s_clr.trgb.add(clr0.trgb, d_clr.trgb);
97 					#endif
98 
99 				//g_profiler.stop();
100 				#elif _SMODE == 1
101 				//g_profiler.start(PROFILER_USER6);
102 				clr0.trgb.square(s_clr.trgb);
103 
104 				#elif _SMODE == 2
105 			//  g_profiler.start(PROFILER_USER4);
106 					#if _DMODE == 0
107 					// this is used heavily on espgal2 highscore screen (~28%) optimized to avoid use of temp clr0 variable
108 					s_clr.trgb.r = colrtable_add[colrtable[(d_clr.trgb.r)][(s_clr.trgb.r)]][dalpha_table[(d_clr.trgb.r)]];
109 					s_clr.trgb.g = colrtable_add[colrtable[(d_clr.trgb.g)][(s_clr.trgb.g)]][dalpha_table[(d_clr.trgb.g)]];
110 					s_clr.trgb.b = colrtable_add[colrtable[(d_clr.trgb.b)][(s_clr.trgb.b)]][dalpha_table[(d_clr.trgb.b)]];
111 					#elif _DMODE == 1
112 					clr0.trgb.mul_3param(s_clr.trgb, d_clr.trgb);
113 					s_clr.trgb.add_with_clr_mul_3param(clr0.trgb, d_clr.trgb, s_clr.trgb);
114 					#elif _DMODE == 2
115 					clr0.trgb.mul_3param(s_clr.trgb, d_clr.trgb);
116 					s_clr.trgb.add_with_clr_square(clr0.trgb, d_clr.trgb);
117 					#elif _DMODE == 3
118 					clr0.trgb.mul_3param(s_clr.trgb, d_clr.trgb);
119 					s_clr.trgb.add(clr0.trgb, d_clr.trgb);
120 
121 					#elif _DMODE == 4
122 					clr0.trgb.mul_3param(s_clr.trgb, d_clr.trgb);
123 					s_clr.trgb.add_with_clr_mul_fixed_rev(clr0.trgb, d_alpha, d_clr.trgb);
124 					#elif _DMODE == 5
125 					clr0.trgb.mul_3param(s_clr.trgb, d_clr.trgb);
126 					s_clr.trgb.add_with_clr_mul_rev_3param(clr0.trgb, d_clr.trgb, s_clr.trgb);
127 					#elif _DMODE == 6
128 					clr0.trgb.mul_3param(s_clr.trgb, d_clr.trgb);
129 					s_clr.trgb.add_with_clr_mul_rev_square(clr0.trgb, d_clr.trgb);
130 					#elif _DMODE == 7
131 					clr0.trgb.mul_3param(s_clr.trgb, d_clr.trgb);
132 					s_clr.trgb.add(clr0.trgb, d_clr.trgb);
133 					#endif
134 				//g_profiler.stop();
135 
136 				#elif _SMODE == 3
137 				//g_profiler.start(PROFILER_USER1);
138 				clr0.trgb.copy(s_clr.trgb);
139 
140 				#elif _SMODE == 4
141 				//g_profiler.start(PROFILER_USER2);
142 				clr0.trgb.mul_fixed_rev(s_alpha, s_clr.trgb);
143 				#elif _SMODE == 5
144 				//g_profiler.start(PROFILER_USER3);
145 				clr0.trgb.mul_rev_square(s_clr.trgb);
146 				#elif _SMODE == 6
147 				//g_profiler.start(PROFILER_USER4);
148 				clr0.trgb.mul_rev_3param(s_clr.trgb, d_clr.trgb);
149 				#elif _SMODE == 7
150 				//g_profiler.start(PROFILER_USER5);
151 				clr0.trgb.copy(s_clr.trgb);
152 				#endif
153 
154 
155 // smode 0/2 cases are already split up and handled above.
156 #if _SMODE != 2
157 #if _SMODE != 0
158 
159 				#if _DMODE == 0
160 				s_clr.trgb.add_with_clr_mul_fixed(clr0.trgb, d_alpha, d_clr.trgb);
161 				#elif _DMODE == 1
162 				s_clr.trgb.add_with_clr_mul_3param(clr0.trgb, d_clr.trgb, s_clr.trgb);
163 				#elif _DMODE == 2
164 				s_clr.trgb.add_with_clr_square(clr0.trgb, d_clr.trgb);
165 				#elif _DMODE == 3
166 				s_clr.trgb.add(clr0.trgb, d_clr.trgb);
167 
168 				#elif _DMODE == 4
169 				s_clr.trgb.add_with_clr_mul_fixed_rev(clr0.trgb, d_alpha, d_clr.trgb);
170 				#elif _DMODE == 5
171 				s_clr.trgb.add_with_clr_mul_rev_3param(clr0.trgb, d_clr.trgb, s_clr.trgb);
172 				#elif _DMODE == 6
173 				s_clr.trgb.add_with_clr_mul_rev_square(clr0.trgb, d_clr.trgb);
174 				#elif _DMODE == 7
175 				s_clr.trgb.add(clr0.trgb, d_clr.trgb);
176 				#endif
177 
178 				//g_profiler.stop();
179 #endif
180 #endif
181 
182 			#endif
183 
184 			// write result
185 			*bmp = s_clr.trgb.to_pen() | (pen & 0x20000000);
186 			//*bmp = (s_clr.data << 3) | (pen & 0x20000000); // using the union is actually significantly slower than our to_pen function!
187 
188 #endif // END NOT REALLY SIMPLE
189 
190 #if TRANSPARENT == 1
191 			}
192 #endif
193 			LOOP_INCREMENTS
194