1 //  ---------------------------------------------------------------------------
2 //  This file is part of reSID, a MOS6581 SID emulator engine.
3 //  Copyright (C) 2004  Dag Lem <resid@nimrod.no>
4 //
5 //  This program is free software; you can redistribute it and/or modify
6 //  it under the terms of the GNU General Public License as published by
7 //  the Free Software Foundation; either version 2 of the License, or
8 //  (at your option) any later version.
9 //
10 //  This program is distributed in the hope that it will be useful,
11 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 //  GNU General Public License for more details.
14 //
15 //  You should have received a copy of the GNU General Public License
16 //  along with this program; if not, write to the Free Software
17 //  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18 //  ---------------------------------------------------------------------------
19 
20 #include "sidfp.h"
21 #include <math.h>
22 
23 #define RINGSIZE 2048
24 
25 #ifdef __SSE__
26 #include <xmmintrin.h>
27 #endif
28 
29 /* tables used by voice/wavegen/envgen */
30 float dac[12];
31 float env_dac[256];
32 float wftable[11][4096];
33 
set_voice_nonlinearity(float nl)34 void SIDFP::set_voice_nonlinearity(float nl)
35 {
36   /* all voices, waves, etc. share the same tables */
37   voice[0].envelope.set_nonlinearity(nl);
38   voice[0].wave.set_nonlinearity(nl);
39   voice[0].wave.rebuild_wftable();
40   filter.set_nonlinearity(nl);
41 }
42 
kinked_dac(const int x,const float nonlinearity,const int max)43 float SIDFP::kinked_dac(const int x, const float nonlinearity, const int max)
44 {
45     float value = 0.f;
46 
47     int bit = 1;
48     float weight = 1.f;
49     const float dir = 2.0f * nonlinearity;
50     for (int i = 0; i < max; i ++) {
51         if (x & bit)
52             value += weight;
53         bit <<= 1;
54         weight *= dir;
55     }
56 
57     return value / (weight / nonlinearity / nonlinearity) * (1 << max);
58 }
59 
60 // ----------------------------------------------------------------------------
61 // Constructor.
62 // ----------------------------------------------------------------------------
SIDFP()63 SIDFP::SIDFP()
64 {
65   // Initialize pointers.
66   sample = 0;
67   fir = 0;
68   lastsample[0] = lastsample[1] = lastsample[2] = 0;
69   filtercyclegate = 0;
70 
71   set_sampling_parameters(985248, SAMPLE_INTERPOLATE, 44100);
72 
73   bus_value = 0;
74   bus_value_ttl = 0;
75 
76   input(0);
77 }
78 
79 
80 // ----------------------------------------------------------------------------
81 // Destructor.
82 // ----------------------------------------------------------------------------
~SIDFP()83 SIDFP::~SIDFP()
84 {
85   delete[] sample;
86   delete[] fir;
87 }
88 
89 // ----------------------------------------------------------------------------
90 // Set chip model.
91 // ----------------------------------------------------------------------------
set_chip_model(chip_model model)92 void SIDFP::set_chip_model(chip_model model)
93 {
94   for (int i = 0; i < 3; i++) {
95     voice[i].set_chip_model(model);
96   }
97   voice[0].wave.rebuild_wftable();
98 
99   filter.set_chip_model(model);
100 }
101 
102 // ----------------------------------------------------------------------------
103 // SID reset.
104 // ----------------------------------------------------------------------------
reset()105 void SIDFP::reset()
106 {
107   for (int i = 0; i < 3; i++) {
108     voice[i].reset();
109   }
110   filter.reset();
111   extfilt.reset();
112 
113   bus_value = 0;
114   bus_value_ttl = 0;
115 }
116 
117 
118 // ----------------------------------------------------------------------------
119 // Write 16-bit sample to audio input.
120 // NB! The caller is responsible for keeping the value within 16 bits.
121 // Note that to mix in an external audio signal, the signal should be
122 // resampled to 1MHz first to avoid sampling noise.
123 // ----------------------------------------------------------------------------
input(int sample)124 void SIDFP::input(int sample)
125 {
126   // Voice outputs are 20 bits. Scale up to match three voices in order
127   // to facilitate simulation of the MOS8580 "digi boost" hardware hack.
128   ext_in = static_cast<float>((sample << 4) * 3);
129 }
130 
131 // ----------------------------------------------------------------------------
132 // Read sample from audio output, 16-bit result.
133 // Do not use this directly, rather use the high-quality resampling outputs.
134 // ----------------------------------------------------------------------------
output()135 float SIDFP::output()
136 {
137   /* Scale to roughly -1 .. 1 range. Voices go from -2048 to 2048 or so,
138    * envelope from 0 to 255, there are 3 voices, and there's factor of 2
139    * for resonance. */
140   return extfilt.output() * (1.f / (2047.f * 255.f * 3.0f * 2.0f));
141 }
142 
143 // ----------------------------------------------------------------------------
144 // Read registers.
145 //
146 // Reading a write only register returns the last byte written to any SID
147 // register. The individual bits in this value start to fade down towards
148 // zero after a few cycles. All bits reach zero within approximately
149 // $2000 - $4000 cycles.
150 // It has been claimed that this fading happens in an orderly fashion, however
151 // sampling of write only registers reveals that this is not the case.
152 // NB! This is not correctly modeled.
153 // The actual use of write only registers has largely been made in the belief
154 // that all SID registers are readable. To support this belief the read
155 // would have to be done immediately after a write to the same register
156 // (remember that an intermediate write to another register would yield that
157 // value instead). With this in mind we return the last value written to
158 // any SID register for $2000 cycles without modeling the bit fading.
159 // ----------------------------------------------------------------------------
read(reg8 offset)160 reg8 SIDFP::read(reg8 offset)
161 {
162   switch (offset) {
163   case 0x19:
164     return potx.readPOT();
165   case 0x1a:
166     return poty.readPOT();
167   case 0x1b:
168     return voice[2].wave.readOSC(voice[0].wave);
169   case 0x1c:
170     return voice[2].envelope.readENV();
171   default:
172     return bus_value;
173   }
174 }
175 
176 
177 // ----------------------------------------------------------------------------
178 // Write registers.
179 // ----------------------------------------------------------------------------
write(reg8 offset,reg8 value)180 void SIDFP::write(reg8 offset, reg8 value)
181 {
182   bus_value = value;
183   bus_value_ttl = 34000;
184 
185   switch (offset) {
186   case 0x00:
187     voice[0].wave.writeFREQ_LO(value);
188     break;
189   case 0x01:
190     voice[0].wave.writeFREQ_HI(value);
191     break;
192   case 0x02:
193     voice[0].wave.writePW_LO(value);
194     break;
195   case 0x03:
196     voice[0].wave.writePW_HI(value);
197     break;
198   case 0x04:
199     voice[0].writeCONTROL_REG(voice[1].wave, value);
200     break;
201   case 0x05:
202     voice[0].envelope.writeATTACK_DECAY(value);
203     break;
204   case 0x06:
205     voice[0].envelope.writeSUSTAIN_RELEASE(value);
206     break;
207   case 0x07:
208     voice[1].wave.writeFREQ_LO(value);
209     break;
210   case 0x08:
211     voice[1].wave.writeFREQ_HI(value);
212     break;
213   case 0x09:
214     voice[1].wave.writePW_LO(value);
215     break;
216   case 0x0a:
217     voice[1].wave.writePW_HI(value);
218     break;
219   case 0x0b:
220     voice[1].writeCONTROL_REG(voice[2].wave, value);
221     break;
222   case 0x0c:
223     voice[1].envelope.writeATTACK_DECAY(value);
224     break;
225   case 0x0d:
226     voice[1].envelope.writeSUSTAIN_RELEASE(value);
227     break;
228   case 0x0e:
229     voice[2].wave.writeFREQ_LO(value);
230     break;
231   case 0x0f:
232     voice[2].wave.writeFREQ_HI(value);
233     break;
234   case 0x10:
235     voice[2].wave.writePW_LO(value);
236     break;
237   case 0x11:
238     voice[2].wave.writePW_HI(value);
239     break;
240   case 0x12:
241     voice[2].writeCONTROL_REG(voice[0].wave, value);
242     break;
243   case 0x13:
244     voice[2].envelope.writeATTACK_DECAY(value);
245     break;
246   case 0x14:
247     voice[2].envelope.writeSUSTAIN_RELEASE(value);
248     break;
249   case 0x15:
250     filter.writeFC_LO(value);
251     break;
252   case 0x16:
253     filter.writeFC_HI(value);
254     break;
255   case 0x17:
256     filter.writeRES_FILT(value);
257     break;
258   case 0x18:
259     filter.writeMODE_VOL(value);
260     break;
261   default:
262     break;
263   }
264 }
265 
266 
267 // ----------------------------------------------------------------------------
268 // SID voice muting.
269 // ----------------------------------------------------------------------------
mute(reg8 channel,bool enable)270 void SIDFP::mute(reg8 channel, bool enable)
271 {
272   // Only have 3 voices!
273   if (channel >= 3)
274     return;
275 
276   voice[channel].mute (enable);
277 }
278 
279 // ----------------------------------------------------------------------------
280 // Enable filter.
281 // ----------------------------------------------------------------------------
enable_filter(bool enable)282 void SIDFP::enable_filter(bool enable)
283 {
284   filter.enable_filter(enable);
285 }
286 
287 // ----------------------------------------------------------------------------
288 // I0() computes the 0th order modified Bessel function of the first kind.
289 // This function is originally from resample-1.5/filterkit.c by J. O. Smith.
290 // ----------------------------------------------------------------------------
I0(double x)291 double SIDFP::I0(double x)
292 {
293   // Max error acceptable in I0 could be 1e-6, which gives that 96 dB already.
294   // I'm overspecify these errors to get a beautiful FFT dump of the FIR.
295   const double I0e = 1e-10;
296 
297   double sum, u, halfx, temp;
298   int n;
299 
300   sum = u = n = 1;
301   halfx = x/2.0;
302 
303   do {
304     temp = halfx/n++;
305     u *= temp*temp;
306     sum += u;
307   } while (u >= I0e*sum);
308 
309   return sum;
310 }
311 
312 
313 // ----------------------------------------------------------------------------
314 // Setting of SID sampling parameters.
315 //
316 // Use a clock freqency of 985248Hz for PAL C64, 1022730Hz for NTSC C64.
317 // The default end of passband frequency is pass_freq = 0.9*sample_freq/2
318 // for sample frequencies up to ~ 44.1kHz, and 20kHz for higher sample
319 // frequencies.
320 //
321 // For resampling, the ratio between the clock frequency and the sample
322 // frequency is limited as follows:
323 //   125*clock_freq/sample_freq < 16384
324 // E.g. provided a clock frequency of ~ 1MHz, the sample frequency can not
325 // be set lower than ~ 8kHz. A lower sample frequency would make the
326 // resampling code overfill its 16k sample ring buffer.
327 //
328 // The end of passband frequency is also limited:
329 //   pass_freq <= 0.9*sample_freq/2
330 
331 // E.g. for a 44.1kHz sampling rate the end of passband frequency is limited
332 // to slightly below 20kHz. This constraint ensures that the FIR table is
333 // not overfilled.
334 // ----------------------------------------------------------------------------
set_sampling_parameters(double clock_freq,sampling_method method,double sample_freq,double pass_freq)335 bool SIDFP::set_sampling_parameters(double clock_freq, sampling_method method,
336 				  double sample_freq, double pass_freq)
337 {
338   filter.set_clock_frequency(static_cast<float>(clock_freq * 0.5));
339   extfilt.set_clock_frequency(static_cast<float>(clock_freq * 0.5));
340 
341   cycles_per_sample = static_cast<float>(clock_freq / sample_freq);
342 
343   // FIR initialization is only necessary for resampling.
344   if (method != SAMPLE_RESAMPLE_INTERPOLATE)
345   {
346     sampling = method;
347 
348     delete[] sample;
349     delete[] fir;
350     sample = 0;
351     fir = 0;
352     sample_prev = 0;
353     return true;
354   }
355 
356   sample_offset = 0;
357 
358   // Allocate sample buffer.
359   if (!sample)
360     sample = new float[RINGSIZE*2];
361 
362   // Clear sample buffer.
363   for (int j = 0; j < RINGSIZE*2; j++)
364     sample[j] = 0;
365   sample_index = 0;
366 
367   /* Up to 20 kHz or at most 90 % of passband if it's lower than 20 kHz. */
368   if (pass_freq > 20000)
369     pass_freq = 20000;
370   if (2*pass_freq/sample_freq > 0.9)
371     pass_freq = 0.9*sample_freq/2;
372 
373   // 16 bits -> -96dB stopband attenuation.
374   const double A = -20*log10(1.0/(1 << 16));
375 
376   // For calculation of beta and N see the reference for the kaiserord
377   // function in the MATLAB Signal Processing Toolbox:
378   // http://www.mathworks.com/access/helpdesk/help/toolbox/signal/kaiserord.html
379   const double beta = 0.1102*(A - 8.7);
380   const double I0beta = I0(beta);
381 
382   // Since we clock the filter at half the rate, we need to design the FIR
383   // with the reduced rate in mind.
384   double f_cycles_per_sample = (clock_freq * 0.5)/sample_freq;
385   double f_samples_per_cycle = sample_freq/(clock_freq * 0.5);
386 
387   double aliasing_allowance = sample_freq / 2 - 20000;
388   // no allowance to 20 kHz
389   if (aliasing_allowance < 0)
390     aliasing_allowance = 0;
391 
392   double transition_bandwidth = sample_freq/2 - pass_freq + aliasing_allowance;
393   {
394     // The filter order will maximally be 124 with the current constraints.
395     // N >= (96.33 - 7.95)/(2 * pi * 2.285 * (maxfreq - passbandfreq) >= 123
396     // The filter order is equal to the number of zero crossings, i.e.
397     // it should be an even number (sinc is symmetric about x = 0).
398     //
399     // XXX: analysis indicates that the filter is slighly overspecified by
400     //      there constraints. Need to check why. One possibility is the
401     //      level of audio being in truth closer to 15-bit than 16-bit.
402     int N = static_cast<int>((A - 7.95)/(2 * M_PI * 2.285 * transition_bandwidth/sample_freq) + 0.5);
403     N += N & 1;
404 
405     // The filter length is equal to the filter order + 1.
406     // The filter length must be an odd number (sinc is symmetric about x = 0).
407     fir_N = static_cast<int>(N*f_cycles_per_sample) + 1;
408     fir_N |= 1;
409 
410     // Check whether the sample ring buffer would overfill.
411     if (fir_N > RINGSIZE - 1)
412       return false;
413 
414     /* Error is bound by 1.234 / L^2, so for 16-bit: sqrt(1.234 * (1 << 16)) */
415     fir_RES = static_cast<int>(sqrt(1.234 * (1 << 16)) / f_cycles_per_sample + 0.5);
416   }
417   sampling = method;
418 
419   // Allocate memory for FIR tables.
420   delete[] fir;
421   fir = new float[fir_N*fir_RES];
422 
423   // The cutoff frequency is midway through the transition band
424   double wc = (pass_freq + transition_bandwidth/2) / sample_freq * M_PI * 2;
425 
426   // Calculate fir_RES FIR tables for linear interpolation.
427   for (int i = 0; i < fir_RES; i++) {
428     double j_offset = double(i)/fir_RES;
429     // Calculate FIR table. This is the sinc function, weighted by the
430     // Kaiser window.
431     for (int j = 0; j < fir_N; j ++) {
432       double jx = double(j) - fir_N/2.f - j_offset;
433       double wt = wc*jx/f_cycles_per_sample;
434       double temp = jx/(fir_N/2);
435       double Kaiser =
436 	fabs(temp) <= 1 ? I0(beta*sqrt(1 - temp*temp))/I0beta : 0;
437       // between 1e-7 and 1e-8 the FP result approximates to 1 due to FP limits
438       double sincwt =
439 	fabs(wt) >= 1e-8 ? sin(wt)/wt : 1;
440       fir[i * fir_N + j] = static_cast<float>(f_samples_per_cycle*wc/M_PI*sincwt*Kaiser);
441     }
442   }
443 
444   return true;
445 }
446 
447 inline
age_bus_value(cycle_count n)448 void SIDFP::age_bus_value(cycle_count n) {
449   // Age bus value. This is not supposed to be cycle exact,
450   // so it should be safe to approximate.
451   if (bus_value_ttl != 0) {
452     bus_value_ttl -= n;
453     if (bus_value_ttl <= 0) {
454 	bus_value = 0;
455 	bus_value_ttl = 0;
456     }
457   }
458 }
459 
460 // ----------------------------------------------------------------------------
461 // SID clocking - 1 cycle.
462 // ----------------------------------------------------------------------------
clock()463 void SIDFP::clock()
464 {
465   // Clock amplitude modulators.
466   for (int i = 0; i < 3; i++) {
467     voice[i].envelope.clock();
468     voice[i].wave.clock();
469   }
470 
471   voice[0].wave.synchronize(voice[1].wave, voice[2].wave);
472   voice[1].wave.synchronize(voice[2].wave, voice[0].wave);
473   voice[2].wave.synchronize(voice[0].wave, voice[1].wave);
474 
475   /* because the analog parts are relatively expensive and do not really need
476    * the precision of 1 MHz calculations, I average successive samples here to
477    * reduce the cpu drain for filter calculations and output resampling. */
478   float voicestate[3];
479   voicestate[0] = voice[0].output(voice[2].wave);
480   voicestate[1] = voice[1].output(voice[0].wave);
481   voicestate[2] = voice[2].output(voice[1].wave);
482 
483   /* for every second sample in sequence, clock filter */
484   if (filtercyclegate ++ & 1) {
485     extfilt.clock(filter.clock(
486       (lastsample[0] + voicestate[0]) * 0.5f,
487       (lastsample[1] + voicestate[1]) * 0.5f,
488       (lastsample[2] + voicestate[2]) * 0.5f,
489       ext_in
490     ));
491   }
492 
493   lastsample[0] = voicestate[0];
494   lastsample[1] = voicestate[1];
495   lastsample[2] = voicestate[2];
496 }
497 
498 // ----------------------------------------------------------------------------
499 // SID clocking with audio sampling.
500 //
501 // The example below shows how to clock the SID a specified amount of cycles
502 // while producing audio output:
503 //
504 // while (delta_t) {
505 //   bufindex += sid.clock(delta_t, buf + bufindex, buflength - bufindex);
506 //   write(dsp, buf, bufindex*2);
507 //   bufindex = 0;
508 // }
509 //
510 // ----------------------------------------------------------------------------
clock(cycle_count & delta_t,short * buf,int n,int interleave)511 int SIDFP::clock(cycle_count& delta_t, short* buf, int n, int interleave)
512 {
513   /* XXX I assume n is generally large enough for delta_t here... */
514   age_bus_value(delta_t);
515 
516 /* We can only control that SSE is really used for GCC */
517 #if defined(__SSE__) && defined(__GNUC__)
518   int old = _mm_getcsr();
519   _mm_setcsr(old | _MM_FLUSH_ZERO_ON);
520 #endif
521   int res;
522   switch (sampling) {
523   default:
524   case SAMPLE_INTERPOLATE:
525     res = clock_interpolate(delta_t, buf, n, interleave);
526     break;
527   case SAMPLE_RESAMPLE_INTERPOLATE:
528     res = clock_resample_interpolate(delta_t, buf, n, interleave);
529     break;
530   }
531 #if defined(__SSE__) && defined(__GNUC__)
532   _mm_setcsr(old);
533 #else
534   filter.nuke_denormals();
535   extfilt.nuke_denormals();
536 #endif
537 
538   return res;
539 }
540 
clock_fast(cycle_count & delta_t,short * buf,int n,int interleave)541 int SIDFP::clock_fast(cycle_count& delta_t, short* buf, int n, int interleave)
542 {
543   age_bus_value(delta_t);
544 #if defined(__SSE__) && defined(__GNUC__)
545   int old = _mm_getcsr();
546   _mm_setcsr(old | _MM_FLUSH_ZERO_ON);
547 #endif
548   int res = clock_interpolate(delta_t, buf, n, interleave);
549 #if defined(__SSE__) && defined(__GNUC__)
550   _mm_setcsr(old);
551 #else
552   filter.nuke_denormals();
553   extfilt.nuke_denormals();
554 #endif
555   return res;
556 }
557 
558 
559 // ----------------------------------------------------------------------------
560 // SID clocking with audio sampling - cycle based with linear sample
561 // interpolation.
562 //
563 // Here the chip is clocked every cycle. This yields higher quality
564 // sound since the samples are linearly interpolated, and since the
565 // external filter attenuates frequencies above 16kHz, thus reducing
566 // sampling noise.
567 // ----------------------------------------------------------------------------
568 inline
clock_interpolate(cycle_count & delta_t,short * buf,int n,int interleave)569 int SIDFP::clock_interpolate(cycle_count& delta_t, short* buf, int n,
570 			   int interleave)
571 {
572   int s = 0;
573   int i;
574 
575   for (;;) {
576     float next_sample_offset = sample_offset + cycles_per_sample;
577     int delta_t_sample = static_cast<int>(next_sample_offset);
578     if (delta_t_sample > delta_t) {
579       break;
580     }
581     if (s >= n) {
582       return s;
583     }
584     for (i = 0; i < delta_t_sample - 1; i++) {
585       clock();
586     }
587     if (i < delta_t_sample) {
588       sample_prev = output();
589       clock();
590     }
591 
592     delta_t -= delta_t_sample;
593     sample_offset = next_sample_offset - delta_t_sample;
594 
595     float sample_now = output();
596     int sample_int = (int)((sample_prev + (sample_offset * (sample_now - sample_prev))) * 32768.0f);
597     if (sample_int < -32768) sample_int = -32768;
598     if (sample_int > 32767) sample_int = 32767;
599     buf[s++ * interleave] = sample_int;
600 
601     sample_prev = sample_now;
602   }
603 
604   for (i = 0; i < delta_t - 1; i++) {
605     clock();
606   }
607   if (i < delta_t) {
608     sample_prev = output();
609     clock();
610   }
611   sample_offset -= delta_t;
612   delta_t = 0;
613   return s;
614 }
615 
convolve(const float * a,const float * b,int n)616 static float convolve(const float *a, const float *b, int n)
617 {
618     float out = 0.f;
619 #ifdef __SSE__
620     __m128 out4 = { 0, 0, 0, 0 };
621 
622     /* examine if we can use aligned loads on both pointers -- some x86-32/64
623      * hackery here ... should use uintptr_t, but that needs --std=C99... */
624     int diff = static_cast<int>(a - b) & 0xf;
625     int a_align = static_cast<int>(reinterpret_cast<long>(a)) & 0xf;
626 
627     /* advance if necessary. We can't let n fall < 0, so no while (n --). */
628     while (n > 0 && a_align != 0 && a_align != 16) {
629 	out += (*(a ++)) * (*(b ++));
630 	n --;
631 	a_align += 4;
632     }
633 
634     if (diff == 0) {
635         while (n >= 4) {
636             out4 = _mm_add_ps(out4, _mm_mul_ps(_mm_load_ps(a), _mm_load_ps(b)));
637             a += 4;
638             b += 4;
639             n -= 4;
640         }
641     } else {
642         /* loadu is difficult to optimize:
643          *
644          * - using load and movhl tricks to sync the halves was not noticeably
645          *   faster, less than 1 % and that might have been measurement error.
646          * - preparing copies of b for syncing with any alignment increased
647          *   memory pressure to the point that cache misses made it slower!
648          */
649         while (n >= 4) {
650             out4 = _mm_add_ps(out4, _mm_mul_ps(_mm_load_ps(a), _mm_loadu_ps(b)));
651             a += 4;
652             b += 4;
653             n -= 4;
654         }
655     }
656 
657     /* sum the upper half of values with the lower half, pairwise */
658     out4 = _mm_add_ps(_mm_movehl_ps(out4, out4), out4);
659     /* sum the value at slot 1 with the value at slot 0 */
660     out4 = _mm_add_ss(_mm_shuffle_ps(out4, out4, 1), out4);
661     float out_tmp;
662     /* save slot 0 to out_tmp, which is now 0+1+2+3 */
663     _mm_store_ss(&out_tmp, out4);
664     out += out_tmp;
665 #endif
666     while (n --)
667         out += (*(a ++)) * (*(b ++));
668 
669     return out;
670 }
671 
672 
673 // ----------------------------------------------------------------------------
674 // SID clocking with audio sampling - cycle based with audio resampling.
675 //
676 // This is the theoretically correct (and computationally intensive) audio
677 // sample generation. The samples are generated by resampling to the specified
678 // sampling frequency. The work rate is inversely proportional to the
679 // percentage of the bandwidth allocated to the filter transition band.
680 //
681 // This implementation is based on the paper "A Flexible Sampling-Rate
682 // Conversion Method", by J. O. Smith and P. Gosset, or rather on the
683 // expanded tutorial on the "Digital Audio Resampling Home Page":
684 // http://www-ccrma.stanford.edu/~jos/resample/
685 //
686 // By building shifted FIR tables with samples according to the
687 // sampling frequency, this implementation dramatically reduces the
688 // computational effort in the filter convolutions, without any loss
689 // of accuracy. The filter convolutions are also vectorizable on
690 // current hardware.
691 //
692 // Further possible optimizations are:
693 // * An equiripple filter design could yield a lower filter order, see
694 //   http://www.mwrf.com/Articles/ArticleID/7229/7229.html
695 // * The Convolution Theorem could be used to bring the complexity of
696 //   convolution down from O(n*n) to O(n*log(n)) using the Fast Fourier
697 //   Transform, see http://en.wikipedia.org/wiki/Convolution_theorem
698 // * Simply resampling in two steps can also yield computational
699 //   savings, since the transition band will be wider in the first step
700 //   and the required filter order is thus lower in this step.
701 //   Laurent Ganier has found the optimal intermediate sampling frequency
702 //   to be (via derivation of sum of two steps):
703 //     2 * pass_freq + sqrt [ 2 * pass_freq * orig_sample_freq
704 //       * (dest_sample_freq - 2 * pass_freq) / dest_sample_freq ]
705 //
706 // NB! the result of right shifting negative numbers is really
707 // implementation dependent in the C++ standard.
708 // ----------------------------------------------------------------------------
709 inline
clock_resample_interpolate(cycle_count & delta_t,short * buf,int n,int interleave)710 int SIDFP::clock_resample_interpolate(cycle_count& delta_t, short* buf, int n,
711                                     int interleave)
712 {
713   int s = 0;
714 
715   for (;;) {
716     float next_sample_offset = sample_offset + cycles_per_sample;
717     /* full clocks left to next sample */
718     int delta_t_sample = static_cast<int>(next_sample_offset);
719     if (delta_t_sample > delta_t || s >= n)
720       break;
721 
722     /* clock forward delta_t_sample samples */
723     for (int i = 0; i < delta_t_sample; i++) {
724       clock();
725       if (filtercyclegate & 1) {
726         sample[sample_index] = sample[sample_index + RINGSIZE] = output();
727         ++ sample_index;
728         sample_index &= RINGSIZE - 1;
729       }
730     }
731     delta_t -= delta_t_sample;
732 
733     /* Phase of the sample in terms of clock, [0 .. 1[. */
734     sample_offset = next_sample_offset - static_cast<float>(delta_t_sample);
735 
736     /* find the first of the nearest fir tables close to the phase */
737     float fir_offset_rmd = sample_offset * fir_RES;
738     int fir_offset = static_cast<int>(fir_offset_rmd);
739     /* [0 .. 1[ */
740     fir_offset_rmd -= static_cast<float>(fir_offset);
741 
742     /* find fir_N most recent samples, plus one extra in case the FIR wraps. */
743     float* sample_start = sample + sample_index - fir_N + RINGSIZE - 1;
744 
745     float v1 = convolve(sample_start, fir + fir_offset * fir_N, fir_N);
746     // Use next FIR table, wrap around to first FIR table using
747     // the next sample.
748     if (++ fir_offset == fir_RES) {
749       fir_offset = 0;
750       ++ sample_start;
751     }
752     float v2 = convolve(sample_start, fir + fir_offset * fir_N, fir_N);
753 
754     // Linear interpolation between the sinc tables yields good approximation
755     // for the exact value.
756 
757     int sample_int = (int)((v1 + fir_offset_rmd * (v2 - v1)) * 32768.0f);
758     if (sample_int < -32768) sample_int = -32768;
759     if (sample_int > 32767) sample_int = 32767;
760     buf[s++ * interleave] = sample_int;
761   }
762 
763   /* clock forward delta_t samples */
764   for (int i = 0; i < delta_t; i++) {
765     clock();
766     if (filtercyclegate & 1) {
767       sample[sample_index] = sample[sample_index + RINGSIZE] = output();
768       ++ sample_index;
769       sample_index &= RINGSIZE - 1;
770     }
771   }
772   sample_offset -= static_cast<float>(delta_t);
773   delta_t = 0;
774   return s;
775 }
776