1 /* sms_ntsc 0.2.3. http://www.slack.net/~ant/ */
2
3 /* Common implementation of NTSC filters */
4
5 #include <assert.h>
6 #include <math.h>
7
8 /* Copyright (C) 2006 Shay Green. This module is free software; you
9 can redistribute it and/or modify it under the terms of the GNU Lesser
10 General Public License as published by the Free Software Foundation; either
11 version 2.1 of the License, or (at your option) any later version. This
12 module is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
15 details. You should have received a copy of the GNU Lesser General Public
16 License along with this module; if not, write to the Free Software Foundation,
17 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
18
19 #define DISABLE_CORRECTION 0
20
21 #undef PI
22 #define PI 3.14159265358979323846f
23
24 #ifndef LUMA_CUTOFF
25 #define LUMA_CUTOFF 0.20
26 #endif
27 #ifndef gamma_size
28 #define gamma_size 1
29 #endif
30 #ifndef rgb_bits
31 #define rgb_bits 8
32 #endif
33 #ifndef artifacts_max
34 #define artifacts_max (artifacts_mid * 1.5f)
35 #endif
36 #ifndef fringing_max
37 #define fringing_max (fringing_mid * 2)
38 #endif
39 #ifndef STD_HUE_CONDITION
40 #define STD_HUE_CONDITION( setup ) 1
41 #endif
42
43 #define ext_decoder_hue (std_decoder_hue + 15)
44 #define rgb_unit (1 << rgb_bits)
45 #define rgb_offset (rgb_unit * 2 + 0.5f)
46
47 enum { burst_size = sms_ntsc_entry_size / burst_count };
48 enum { kernel_half = 16 };
49 enum { kernel_size = kernel_half * 2 + 1 };
50
51 typedef struct init_t
52 {
53 float to_rgb [burst_count * 6];
54 float to_float [gamma_size];
55 float contrast;
56 float brightness;
57 float artifacts;
58 float fringing;
59 float kernel [rescale_out * kernel_size * 2];
60 } init_t;
61
62 #define ROTATE_IQ( i, q, sin_b, cos_b ) {\
63 float t;\
64 t = i * cos_b - q * sin_b;\
65 q = i * sin_b + q * cos_b;\
66 i = t;\
67 }
68
init_filters(init_t * impl,sms_ntsc_setup_t const * setup)69 static void init_filters( init_t* impl, sms_ntsc_setup_t const* setup )
70 {
71 #if rescale_out > 1
72 float kernels [kernel_size * 2];
73 #else
74 float* const kernels = impl->kernel;
75 #endif
76
77 /* generate luma (y) filter using sinc kernel */
78 {
79 /* sinc with rolloff (dsf) */
80 float const rolloff = 1 + (float) setup->sharpness * (float) 0.032;
81 float const maxh = 32;
82 float const pow_a_n = (float) pow( rolloff, maxh );
83 float sum;
84 int i;
85 /* quadratic mapping to reduce negative (blurring) range */
86 float to_angle = (float) setup->resolution + 1;
87 to_angle = PI / maxh * (float) LUMA_CUTOFF * (to_angle * to_angle + 1);
88
89 kernels [kernel_size * 3 / 2] = maxh; /* default center value */
90 for ( i = 0; i < kernel_half * 2 + 1; i++ )
91 {
92 int x = i - kernel_half;
93 float angle = x * to_angle;
94 /* instability occurs at center point with rolloff very close to 1.0 */
95 if ( x || pow_a_n > (float) 1.056 || pow_a_n < (float) 0.981 )
96 {
97 float rolloff_cos_a = rolloff * (float) cos( angle );
98 float num = 1 - rolloff_cos_a -
99 pow_a_n * (float) cos( maxh * angle ) +
100 pow_a_n * rolloff * (float) cos( (maxh - 1) * angle );
101 float den = 1 - rolloff_cos_a - rolloff_cos_a + rolloff * rolloff;
102 float dsf = num / den;
103 kernels [kernel_size * 3 / 2 - kernel_half + i] = dsf - (float) 0.5;
104 }
105 }
106
107 /* apply blackman window and find sum */
108 sum = 0;
109 for ( i = 0; i < kernel_half * 2 + 1; i++ )
110 {
111 float x = PI * 2 / (kernel_half * 2) * i;
112 float blackman = 0.42f - 0.5f * (float) cos( x ) + 0.08f * (float) cos( x * 2 );
113 sum += (kernels [kernel_size * 3 / 2 - kernel_half + i] *= blackman);
114 }
115
116 /* normalize kernel */
117 sum = 1.0f / sum;
118 for ( i = 0; i < kernel_half * 2 + 1; i++ )
119 {
120 int x = kernel_size * 3 / 2 - kernel_half + i;
121 kernels [x] *= sum;
122 assert( kernels [x] == kernels [x] ); /* catch numerical instability */
123 }
124 }
125
126 /* generate chroma (iq) filter using gaussian kernel */
127 {
128 float const cutoff_factor = -0.03125f;
129 float cutoff = (float) setup->bleed;
130 int i;
131
132 if ( cutoff < 0 )
133 {
134 /* keep extreme value accessible only near upper end of scale (1.0) */
135 cutoff *= cutoff;
136 cutoff *= cutoff;
137 cutoff *= cutoff;
138 cutoff *= -30.0f / 0.65f;
139 }
140 cutoff = cutoff_factor - 0.65f * cutoff_factor * cutoff;
141
142 for ( i = -kernel_half; i <= kernel_half; i++ )
143 kernels [kernel_size / 2 + i] = (float) exp( i * i * cutoff );
144
145 /* normalize even and odd phases separately */
146 for ( i = 0; i < 2; i++ )
147 {
148 float sum = 0;
149 int x;
150 for ( x = i; x < kernel_size; x += 2 )
151 sum += kernels [x];
152
153 sum = 1.0f / sum;
154 for ( x = i; x < kernel_size; x += 2 )
155 {
156 kernels [x] *= sum;
157 assert( kernels [x] == kernels [x] ); /* catch numerical instability */
158 }
159 }
160 }
161
162 /*
163 printf( "luma:\n" );
164 for ( i = kernel_size; i < kernel_size * 2; i++ )
165 printf( "%f\n", kernels [i] );
166 printf( "chroma:\n" );
167 for ( i = 0; i < kernel_size; i++ )
168 printf( "%f\n", kernels [i] );
169 */
170
171 /* generate linear rescale kernels */
172 #if rescale_out > 1
173 {
174 float weight = 1.0f;
175 float* out = impl->kernel;
176 int n = rescale_out;
177 do
178 {
179 float remain = 0;
180 int i;
181 weight -= 1.0f / rescale_in;
182 for ( i = 0; i < kernel_size * 2; i++ )
183 {
184 float cur = kernels [i];
185 float m = cur * weight;
186 *out++ = m + remain;
187 remain = cur - m;
188 }
189 }
190 while ( --n );
191 }
192 #endif
193 }
194
195 static float const default_decoder [6] =
196 { 0.956f, 0.621f, -0.272f, -0.647f, -1.105f, 1.702f };
197
init(init_t * impl,sms_ntsc_setup_t const * setup)198 static void init( init_t* impl, sms_ntsc_setup_t const* setup )
199 {
200 impl->brightness = (float) setup->brightness * (0.5f * rgb_unit) + rgb_offset;
201 impl->contrast = (float) setup->contrast * (0.5f * rgb_unit) + rgb_unit;
202 #ifdef default_palette_contrast
203 if ( !setup->palette )
204 impl->contrast *= default_palette_contrast;
205 #endif
206
207 impl->artifacts = (float) setup->artifacts;
208 if ( impl->artifacts > 0 )
209 impl->artifacts *= artifacts_max - artifacts_mid;
210 impl->artifacts = impl->artifacts * artifacts_mid + artifacts_mid;
211
212 impl->fringing = (float) setup->fringing;
213 if ( impl->fringing > 0 )
214 impl->fringing *= fringing_max - fringing_mid;
215 impl->fringing = impl->fringing * fringing_mid + fringing_mid;
216
217 init_filters( impl, setup );
218
219 /* generate gamma table */
220 if ( gamma_size > 1 )
221 {
222 float const to_float = 1.0f / (gamma_size - (gamma_size > 1));
223 float const gamma = 1.1333f - (float) setup->gamma * 0.5f;
224 /* match common PC's 2.2 gamma to TV's 2.65 gamma */
225 int i;
226 for ( i = 0; i < gamma_size; i++ )
227 impl->to_float [i] =
228 (float) pow( i * to_float, gamma ) * impl->contrast + impl->brightness;
229 }
230
231 /* setup decoder matricies */
232 {
233 float hue = (float) setup->hue * PI + PI / 180 * ext_decoder_hue;
234 float sat = (float) setup->saturation + 1;
235 float const* decoder = setup->decoder_matrix;
236 if ( !decoder )
237 {
238 decoder = default_decoder;
239 if ( STD_HUE_CONDITION( setup ) )
240 hue += PI / 180 * (std_decoder_hue - ext_decoder_hue);
241 }
242
243 {
244 float s = (float) sin( hue ) * sat;
245 float c = (float) cos( hue ) * sat;
246 float* out = impl->to_rgb;
247 int n;
248
249 n = burst_count;
250 do
251 {
252 float const* in = decoder;
253 int n = 3;
254 do
255 {
256 float i = *in++;
257 float q = *in++;
258 *out++ = i * c - q * s;
259 *out++ = i * s + q * c;
260 }
261 while ( --n );
262 if ( burst_count <= 1 )
263 break;
264 ROTATE_IQ( s, c, 0.866025f, -0.5f ); /* +120 degrees */
265 }
266 while ( --n );
267 }
268 }
269 }
270
271 /* kernel generation */
272
273 #define RGB_TO_YIQ( r, g, b, y, i ) (\
274 (y = (r) * 0.299f + (g) * 0.587f + (b) * 0.114f),\
275 (i = (r) * 0.596f - (g) * 0.275f - (b) * 0.321f),\
276 ((r) * 0.212f - (g) * 0.523f + (b) * 0.311f)\
277 )
278
279 #define YIQ_TO_RGB( y, i, q, to_rgb, type, r, g ) (\
280 r = (type) (y + to_rgb [0] * i + to_rgb [1] * q),\
281 g = (type) (y + to_rgb [2] * i + to_rgb [3] * q),\
282 (type) (y + to_rgb [4] * i + to_rgb [5] * q)\
283 )
284
285 #define PACK_RGB( r, g, b ) ((r) << 21 | (g) << 11 | (b) << 1)
286
287 enum { rgb_kernel_size = burst_size / alignment_count };
288 enum { rgb_bias = rgb_unit * 2 * sms_ntsc_rgb_builder };
289
290 typedef struct pixel_info_t
291 {
292 int offset;
293 float negate;
294 float kernel [4];
295 } pixel_info_t;
296
297 #if rescale_in > 1
298 #define PIXEL_OFFSET_( ntsc, scaled ) \
299 (kernel_size / 2 + ntsc + (scaled != 0) + (rescale_out - scaled) % rescale_out + \
300 (kernel_size * 2 * scaled))
301
302 #define PIXEL_OFFSET( ntsc, scaled ) \
303 PIXEL_OFFSET_( ((ntsc) - (scaled) / rescale_out * rescale_in),\
304 (((scaled) + rescale_out * 10) % rescale_out) ),\
305 (1.0f - (((ntsc) + 100) & 2))
306 #else
307 #define PIXEL_OFFSET( ntsc, scaled ) \
308 (kernel_size / 2 + (ntsc) - (scaled)),\
309 (1.0f - (((ntsc) + 100) & 2))
310 #endif
311
312 extern pixel_info_t const sms_ntsc_pixels [alignment_count];
313
314 /* Generate pixel at all burst phases and column alignments */
gen_kernel(init_t * impl,float y,float i,float q,sms_ntsc_rgb_t * out)315 static void gen_kernel( init_t* impl, float y, float i, float q, sms_ntsc_rgb_t* out )
316 {
317 /* generate for each scanline burst phase */
318 float const* to_rgb = impl->to_rgb;
319 int burst_remain = burst_count;
320 y -= rgb_offset;
321 do
322 {
323 /* Encode yiq into *two* composite signals (to allow control over artifacting).
324 Convolve these with kernels which: filter respective components, apply
325 sharpening, and rescale horizontally. Convert resulting yiq to rgb and pack
326 into integer. Based on algorithm by NewRisingSun. */
327 pixel_info_t const* pixel = sms_ntsc_pixels;
328 int alignment_remain = alignment_count;
329 do
330 {
331 /* negate is -1 when composite starts at odd multiple of 2 */
332 float const yy = y * impl->fringing * pixel->negate;
333 float const ic0 = (i + yy) * pixel->kernel [0];
334 float const qc1 = (q + yy) * pixel->kernel [1];
335 float const ic2 = (i - yy) * pixel->kernel [2];
336 float const qc3 = (q - yy) * pixel->kernel [3];
337
338 float const factor = impl->artifacts * pixel->negate;
339 float const ii = i * factor;
340 float const yc0 = (y + ii) * pixel->kernel [0];
341 float const yc2 = (y - ii) * pixel->kernel [2];
342
343 float const qq = q * factor;
344 float const yc1 = (y + qq) * pixel->kernel [1];
345 float const yc3 = (y - qq) * pixel->kernel [3];
346
347 float const* k = &impl->kernel [pixel->offset];
348 int n;
349 ++pixel;
350 for ( n = rgb_kernel_size; n; --n )
351 {
352 float i = k[0]*ic0 + k[2]*ic2;
353 float q = k[1]*qc1 + k[3]*qc3;
354 float y = k[kernel_size+0]*yc0 + k[kernel_size+1]*yc1 +
355 k[kernel_size+2]*yc2 + k[kernel_size+3]*yc3 + rgb_offset;
356 if ( rescale_out <= 1 )
357 k--;
358 else if ( k < &impl->kernel [kernel_size * 2 * (rescale_out - 1)] )
359 k += kernel_size * 2 - 1;
360 else
361 k -= kernel_size * 2 * (rescale_out - 1) + 2;
362 {
363 int r, g, b = YIQ_TO_RGB( y, i, q, to_rgb, int, r, g );
364 *out++ = PACK_RGB( r, g, b ) - rgb_bias;
365 }
366 }
367 }
368 while ( alignment_count > 1 && --alignment_remain );
369
370 if ( burst_count <= 1 )
371 break;
372
373 to_rgb += 6;
374
375 ROTATE_IQ( i, q, -0.866025f, -0.5f ); /* -120 degrees */
376 }
377 while ( --burst_remain );
378 }
379
380 static void correct_errors( sms_ntsc_rgb_t color, sms_ntsc_rgb_t* out );
381
382 #if DISABLE_CORRECTION
383 #define CORRECT_ERROR( a ) { out [i] += rgb_bias; }
384 #define DISTRIBUTE_ERROR( a, b, c ) { out [i] += rgb_bias; }
385 #else
386 #define CORRECT_ERROR( a ) { out [a] += error; }
387 #define DISTRIBUTE_ERROR( a, b, c ) {\
388 sms_ntsc_rgb_t fourth = (error + 2 * sms_ntsc_rgb_builder) >> 2;\
389 fourth &= (rgb_bias >> 1) - sms_ntsc_rgb_builder;\
390 fourth -= rgb_bias >> 2;\
391 out [a] += fourth;\
392 out [b] += fourth;\
393 out [c] += fourth;\
394 out [i] += error - (fourth * 3);\
395 }
396 #endif
397
398 #define RGB_PALETTE_OUT( rgb, out_ )\
399 {\
400 unsigned char* out = (out_);\
401 sms_ntsc_rgb_t clamped = (rgb);\
402 SMS_NTSC_CLAMP_( clamped, (8 - rgb_bits) );\
403 out [0] = (unsigned char) (clamped >> 21);\
404 out [1] = (unsigned char) (clamped >> 11);\
405 out [2] = (unsigned char) (clamped >> 1);\
406 }
407
408 /* blitter related */
409
410 #ifndef restrict
411 #if defined (__GNUC__)
412 #define restrict __restrict__
413 #elif defined (_MSC_VER) && _MSC_VER > 1300
414 #define restrict __restrict
415 #else
416 /* no support for restricted pointers */
417 #define restrict
418 #endif
419 #endif
420
421 #include <limits.h>
422
423 #if SMS_NTSC_OUT_DEPTH <= 16
424 #if USHRT_MAX == 0xFFFF
425 typedef unsigned short sms_ntsc_out_t;
426 #else
427 #error "Need 16-bit int type"
428 #endif
429
430 #else
431 #if UINT_MAX == 0xFFFFFFFF
432 typedef unsigned int sms_ntsc_out_t;
433 #elif ULONG_MAX == 0xFFFFFFFF
434 typedef unsigned long sms_ntsc_out_t;
435 #else
436 #error "Need 32-bit int type"
437 #endif
438
439 #endif
440