1 //  ---------------------------------------------------------------------------
2 //  This file is part of reSID, a MOS6581 SID emulator engine.
3 //  Copyright (C) 2004  Dag Lem <resid@nimrod.no>
4 //
5 //  This program is free software; you can redistribute it and/or modify
6 //  it under the terms of the GNU General Public License as published by
7 //  the Free Software Foundation; either version 2 of the License, or
8 //  (at your option) any later version.
9 //
10 //  This program is distributed in the hope that it will be useful,
11 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 //  GNU General Public License for more details.
14 //
15 //  You should have received a copy of the GNU General Public License
16 //  along with this program; if not, write to the Free Software
17 //  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18 //  ---------------------------------------------------------------------------
19 
20 #include "sid.h"
21 #include <stdio.h>
22 #include <math.h>
23 
24 extern float convolve(const float *a, const float *b, int n);
25 extern float convolve_sse(const float *a, const float *b, int n);
26 
27 enum host_cpu_feature {
28     HOST_CPU_MMX=1, HOST_CPU_SSE=2, HOST_CPU_SSE2=4, HOST_CPU_SSE3=8
29 };
30 
31 /* This code is appropriate for 32-bit and 64-bit x86 CPUs. */
32 #if defined(__x86_64__) || defined(__i386__) || defined(_MSC_VER)
33 
34 struct cpu_x86_regs_s {
35   unsigned int eax;
36   unsigned int ebx;
37   unsigned int ecx;
38   unsigned int edx;
39 };
40 typedef struct cpu_x86_regs_s cpu_x86_regs_t;
41 
get_cpuid_regs(unsigned int index)42 static cpu_x86_regs_t get_cpuid_regs(unsigned int index)
43 {
44   cpu_x86_regs_t retval;
45 
46 #if defined(_MSC_VER) /* MSVC assembly */
47   __asm {
48     mov eax, [index]
49     cpuid
50     mov [retval.eax], eax
51     mov [retval.ebx], ebx
52     mov [retval.ecx], ecx
53     mov [retval.edx], edx
54   }
55 #else /* GNU assembly */
56   asm("movl %1, %%eax; cpuid; movl %%eax, %0;"
57       : "=m" (retval.eax)
58       : "r"  (index)
59       : "eax", "ebx", "ecx", "edx");
60   asm("movl %1, %%eax; cpuid; movl %%ebx, %0;"
61       : "=m" (retval.ebx)
62       : "r"  (index)
63       : "eax", "ebx", "ecx", "edx");
64   asm("movl %1, %%eax; cpuid; movl %%ecx, %0;"
65       : "=m" (retval.ecx)
66       : "r"  (index)
67       : "eax", "ebx", "ecx", "edx");
68   asm("movl %1, %%eax; cpuid; movl %%edx, %0;"
69       : "=m" (retval.edx)
70       : "r"  (index)
71       : "eax", "ebx", "ecx", "edx");
72 #endif
73 
74   return retval;
75 }
76 
host_cpu_features_by_cpuid(void)77 static int host_cpu_features_by_cpuid(void)
78 {
79   cpu_x86_regs_t regs = get_cpuid_regs(1);
80 
81   int features = 0;
82   if (regs.edx & (1 << 23))
83     features |= HOST_CPU_MMX;
84   if (regs.edx & (1 << 25))
85     features |= HOST_CPU_SSE;
86   if (regs.edx & (1 << 26))
87     features |= HOST_CPU_SSE2;
88   if (regs.ecx & (1 << 0))
89     features |= HOST_CPU_SSE3;
90 
91   return features;
92 }
93 
host_cpu_features(void)94 static int host_cpu_features(void)
95 {
96   static int features = 0;
97   static int features_detected = 0;
98 /* 32-bit only */
99 #if defined(__i386__) || (defined(_MSC_VER) && defined(_WIN32))
100   unsigned long temp1, temp2;
101 #endif
102 
103   if (features_detected)
104     return features;
105   features_detected = 1;
106 
107 #if defined(_MSC_VER) && defined(_WIN32) /* MSVC compatible assembly appropriate for 32-bit Windows */
108   /* see if we are dealing with a cpu that has the cpuid instruction */
109   __asm {
110     pushf
111     pop eax
112     mov [temp1], eax
113     xor eax, 0x200000
114     push eax
115     popf
116     pushf
117     pop eax
118     mov [temp2], eax
119     push [temp1]
120     popf
121   }
122 #endif
123 #if defined(__i386__) /* GNU assembly */
124   asm("pushfl; popl %%eax; movl %%eax, %0; xorl $0x200000, %%eax; pushl %%eax; popfl; pushfl; popl %%eax; movl %%eax, %1; pushl %0; popfl "
125       : "=r" (temp1),
126       "=r" (temp2)
127       :
128       : "eax");
129 #endif
130 #if defined(__i386__) || (defined(_MSC_VER) && defined(_WIN32))
131   temp1 &= 0x200000;
132   temp2 &= 0x200000;
133   if (temp1 == temp2) {
134     /* no cpuid support, so we can't test for SSE availability -> false */
135     return 0;
136   }
137 #endif
138 
139   /* find the highest supported cpuid function, returned in %eax */
140   if (get_cpuid_regs(0).eax < 1) {
141     /* no cpuid 1 function, we can't test for features -> no features */
142     return 0;
143   }
144 
145   features = host_cpu_features_by_cpuid();
146   return features;
147 }
148 
149 #else /* !__x86_64__ && !__i386__ && !_MSC_VER */
host_cpu_features(void)150 static int host_cpu_features(void)
151 {
152   return 0;
153 }
154 #endif
155 
kinked_dac(const int x,const float nonlinearity,const int max)156 float SIDFP::kinked_dac(const int x, const float nonlinearity, const int max)
157 {
158     float value = 0.f;
159 
160     int bit = 1;
161     float weight = 1.f;
162     const float dir = 2.0f * nonlinearity;
163     for (int i = 0; i < max; i ++) {
164         if (x & bit)
165             value += weight;
166         bit <<= 1;
167         weight *= dir;
168     }
169 
170     return value / (weight / nonlinearity) * (1 << max);
171 }
172 
173 // ----------------------------------------------------------------------------
174 // Constructor.
175 // ----------------------------------------------------------------------------
SIDFP()176 SIDFP::SIDFP()
177 {
178 #if (RESID_USE_SSE==1)
179   can_use_sse = (host_cpu_features() & HOST_CPU_SSE) != 0;
180 #else
181   can_use_sse = false;
182 #endif
183 
184   // Initialize pointers.
185   sample = 0;
186   fir = 0;
187 
188   voice[0].set_sync_source(&voice[2]);
189   voice[1].set_sync_source(&voice[0]);
190   voice[2].set_sync_source(&voice[1]);
191 
192   set_sampling_parameters(985248, SAMPLE_INTERPOLATE, 44100);
193 
194   bus_value = 0;
195   bus_value_ttl = 0;
196 
197   input(0);
198 }
199 
200 
201 // ----------------------------------------------------------------------------
202 // Destructor.
203 // ----------------------------------------------------------------------------
~SIDFP()204 SIDFP::~SIDFP()
205 {
206   delete[] sample;
207   delete[] fir;
208 }
209 
210 
211 // ----------------------------------------------------------------------------
212 // Set chip model.
213 // ----------------------------------------------------------------------------
set_chip_model(chip_model model)214 void SIDFP::set_chip_model(chip_model model)
215 {
216   for (int i = 0; i < 3; i++) {
217     voice[i].set_chip_model(model);
218   }
219 
220   filter.set_chip_model(model);
221   extfilt.set_chip_model(model);
222 }
223 
224 /* nonlinear DAC support, set 1 for 8580 / no effect, about 0.96 otherwise */
set_voice_nonlinearity(float nl)225 void SIDFP::set_voice_nonlinearity(float nl)
226 {
227   for (int i = 0; i < 3; i++) {
228     voice[i].set_nonlinearity(nl);
229   }
230 }
231 
232 // ----------------------------------------------------------------------------
233 // SID reset.
234 // ----------------------------------------------------------------------------
reset()235 void SIDFP::reset()
236 {
237   for (int i = 0; i < 3; i++) {
238     voice[i].reset();
239   }
240   filter.reset();
241   extfilt.reset();
242 
243   bus_value = 0;
244   bus_value_ttl = 0;
245 }
246 
247 
248 // ----------------------------------------------------------------------------
249 // Write 16-bit sample to audio input.
250 // NB! The caller is responsible for keeping the value within 16 bits.
251 // Note that to mix in an external audio signal, the signal should be
252 // resampled to 1MHz first to avoid sampling noise.
253 // ----------------------------------------------------------------------------
input(int sample)254 void SIDFP::input(int sample)
255 {
256   // Voice outputs are 20 bits. Scale up to match three voices in order
257   // to facilitate simulation of the MOS8580 "digi boost" hardware hack.
258   ext_in = (float) ( (sample << 4) * 3 );
259 }
260 
output()261 float SIDFP::output()
262 {
263   const float range = 1 << 15;
264   return extfilt.output() / (4095.f * 255.f * 3.f * 1.5f / range);
265 }
266 
267 // ----------------------------------------------------------------------------
268 // Read registers.
269 //
270 // Reading a write only register returns the last byte written to any SID
271 // register. The individual bits in this value start to fade down towards
272 // zero after a few cycles. All bits reach zero within approximately
273 // $2000 - $4000 cycles.
274 // It has been claimed that this fading happens in an orderly fashion, however
275 // sampling of write only registers reveals that this is not the case.
276 // NB! This is not correctly modeled.
277 // The actual use of write only registers has largely been made in the belief
278 // that all SID registers are readable. To support this belief the read
279 // would have to be done immediately after a write to the same register
280 // (remember that an intermediate write to another register would yield that
281 // value instead). With this in mind we return the last value written to
282 // any SID register for $2000 cycles without modeling the bit fading.
283 // ----------------------------------------------------------------------------
read(reg8 offset)284 reg8 SIDFP::read(reg8 offset)
285 {
286   switch (offset) {
287   case 0x19:
288     return potx.readPOT();
289   case 0x1a:
290     return poty.readPOT();
291   case 0x1b:
292     return voice[2].wave.readOSC();
293   case 0x1c:
294     return voice[2].envelope.readENV();
295   default:
296     return bus_value;
297   }
298 }
299 
300 
301 // ----------------------------------------------------------------------------
302 // Write registers.
303 // ----------------------------------------------------------------------------
write(reg8 offset,reg8 value)304 void SIDFP::write(reg8 offset, reg8 value)
305 {
306   bus_value = value;
307   bus_value_ttl = 0x4000;
308 
309   switch (offset) {
310   case 0x00:
311     voice[0].wave.writeFREQ_LO(value);
312     break;
313   case 0x01:
314     voice[0].wave.writeFREQ_HI(value);
315     break;
316   case 0x02:
317     voice[0].wave.writePW_LO(value);
318     break;
319   case 0x03:
320     voice[0].wave.writePW_HI(value);
321     break;
322   case 0x04:
323     voice[0].writeCONTROL_REG(value);
324     break;
325   case 0x05:
326     voice[0].envelope.writeATTACK_DECAY(value);
327     break;
328   case 0x06:
329     voice[0].envelope.writeSUSTAIN_RELEASE(value);
330     break;
331   case 0x07:
332     voice[1].wave.writeFREQ_LO(value);
333     break;
334   case 0x08:
335     voice[1].wave.writeFREQ_HI(value);
336     break;
337   case 0x09:
338     voice[1].wave.writePW_LO(value);
339     break;
340   case 0x0a:
341     voice[1].wave.writePW_HI(value);
342     break;
343   case 0x0b:
344     voice[1].writeCONTROL_REG(value);
345     break;
346   case 0x0c:
347     voice[1].envelope.writeATTACK_DECAY(value);
348     break;
349   case 0x0d:
350     voice[1].envelope.writeSUSTAIN_RELEASE(value);
351     break;
352   case 0x0e:
353     voice[2].wave.writeFREQ_LO(value);
354     break;
355   case 0x0f:
356     voice[2].wave.writeFREQ_HI(value);
357     break;
358   case 0x10:
359     voice[2].wave.writePW_LO(value);
360     break;
361   case 0x11:
362     voice[2].wave.writePW_HI(value);
363     break;
364   case 0x12:
365     voice[2].writeCONTROL_REG(value);
366     break;
367   case 0x13:
368     voice[2].envelope.writeATTACK_DECAY(value);
369     break;
370   case 0x14:
371     voice[2].envelope.writeSUSTAIN_RELEASE(value);
372     break;
373   case 0x15:
374     filter.writeFC_LO(value);
375     break;
376   case 0x16:
377     filter.writeFC_HI(value);
378     break;
379   case 0x17:
380     filter.writeRES_FILT(value);
381     break;
382   case 0x18:
383     filter.writeMODE_VOL(value);
384     break;
385   default:
386     break;
387   }
388 }
389 
390 
391 // ----------------------------------------------------------------------------
392 // Constructor.
393 // ----------------------------------------------------------------------------
State()394 SIDFP::State::State()
395 {
396   int i;
397 
398   for (i = 0; i < 0x20; i++) {
399     sid_register[i] = 0;
400   }
401 
402   bus_value = 0;
403   bus_value_ttl = 0;
404 
405   for (i = 0; i < 3; i++) {
406     accumulator[i] = 0;
407     shift_register[i] = 0x7ffff8;
408     rate_counter[i] = 0;
409     rate_counter_period[i] = 9;
410     exponential_counter[i] = 0;
411     exponential_counter_period[i] = 1;
412     envelope_counter[i] = 0;
413     envelope_state[i] = EnvelopeGeneratorFP::RELEASE;
414     hold_zero[i] = true;
415   }
416 }
417 
418 
419 // ----------------------------------------------------------------------------
420 // Read state.
421 // ----------------------------------------------------------------------------
read_state()422 SIDFP::State SIDFP::read_state()
423 {
424   State state;
425   int i, j;
426 
427   for (i = 0, j = 0; i < 3; i++, j += 7) {
428     WaveformGeneratorFP& wave = voice[i].wave;
429     EnvelopeGeneratorFP& envelope = voice[i].envelope;
430     state.sid_register[j + 0] = wave.freq & 0xff;
431     state.sid_register[j + 1] = wave.freq >> 8;
432     state.sid_register[j + 2] = wave.pw & 0xff;
433     state.sid_register[j + 3] = wave.pw >> 8;
434     state.sid_register[j + 4] =
435       (wave.waveform << 4)
436       | (wave.test ? 0x08 : 0)
437       | (wave.ring_mod ? 0x04 : 0)
438       | (wave.sync ? 0x02 : 0)
439       | (envelope.gate ? 0x01 : 0);
440     state.sid_register[j + 5] = (envelope.attack << 4) | envelope.decay;
441     state.sid_register[j + 6] = (envelope.sustain << 4) | envelope.release;
442   }
443 
444   state.sid_register[j++] = filter.fc & 0x007;
445   state.sid_register[j++] = filter.fc >> 3;
446   state.sid_register[j++] = (filter.res << 4) | filter.filt;
447   state.sid_register[j++] =
448     (filter.voice3off ? 0x80 : 0)
449     | (filter.hp_bp_lp << 4)
450     | filter.vol;
451 
452   // These registers are superfluous, but included for completeness.
453   for (; j < 0x1d; j++) {
454     state.sid_register[j] = read(j);
455   }
456   for (; j < 0x20; j++) {
457     state.sid_register[j] = 0;
458   }
459 
460   state.bus_value = bus_value;
461   state.bus_value_ttl = bus_value_ttl;
462 
463   for (i = 0; i < 3; i++) {
464     state.accumulator[i] = voice[i].wave.accumulator;
465     state.shift_register[i] = voice[i].wave.shift_register;
466     state.rate_counter[i] = voice[i].envelope.rate_counter;
467     state.rate_counter_period[i] = voice[i].envelope.rate_period;
468     state.exponential_counter[i] = voice[i].envelope.exponential_counter;
469     state.exponential_counter_period[i] = voice[i].envelope.exponential_counter_period;
470     state.envelope_counter[i] = voice[i].envelope.envelope_counter;
471     state.envelope_state[i] = voice[i].envelope.state;
472     state.hold_zero[i] = voice[i].envelope.hold_zero;
473   }
474 
475   return state;
476 }
477 
478 
479 // ----------------------------------------------------------------------------
480 // Write state.
481 // ----------------------------------------------------------------------------
write_state(const State & state)482 void SIDFP::write_state(const State& state)
483 {
484   int i;
485 
486   for (i = 0; i <= 0x18; i++) {
487     write(i, state.sid_register[i]);
488   }
489 
490   bus_value = state.bus_value;
491   bus_value_ttl = state.bus_value_ttl;
492 
493   for (i = 0; i < 3; i++) {
494     voice[i].wave.accumulator = state.accumulator[i];
495     voice[i].wave.shift_register = state.shift_register[i];
496     voice[i].envelope.rate_counter = state.rate_counter[i];
497     voice[i].envelope.rate_period = state.rate_counter_period[i];
498     voice[i].envelope.exponential_counter = state.exponential_counter[i];
499     voice[i].envelope.exponential_counter_period = state.exponential_counter_period[i];
500     voice[i].envelope.envelope_counter = state.envelope_counter[i];
501     voice[i].envelope.state = state.envelope_state[i];
502     voice[i].envelope.hold_zero = state.hold_zero[i];
503   }
504 }
505 
506 
507 // ----------------------------------------------------------------------------
508 // Enable filter.
509 // ----------------------------------------------------------------------------
enable_filter(bool enable)510 void SIDFP::enable_filter(bool enable)
511 {
512   filter.enable_filter(enable);
513 }
514 
515 
516 // ----------------------------------------------------------------------------
517 // Enable external filter.
518 // ----------------------------------------------------------------------------
enable_external_filter(bool enable)519 void SIDFP::enable_external_filter(bool enable)
520 {
521   extfilt.enable_filter(enable);
522 }
523 
524 
525 // ----------------------------------------------------------------------------
526 // I0() computes the 0th order modified Bessel function of the first kind.
527 // This function is originally from resample-1.5/filterkit.c by J. O. Smith.
528 // ----------------------------------------------------------------------------
I0(double x)529 double SIDFP::I0(double x)
530 {
531   // Max error acceptable in I0 could be 1e-6, which gives that 96 dB already.
532   // I'm overspecify these errors to get a beautiful FFT dump of the FIR.
533   const double I0e = 1e-10;
534 
535   double sum, u, halfx, temp;
536   int n;
537 
538   sum = u = n = 1;
539   halfx = x/2.0;
540 
541   do {
542     temp = halfx/n++;
543     u *= temp*temp;
544     sum += u;
545   } while (u >= I0e*sum);
546 
547   return sum;
548 }
549 
550 
551 // ----------------------------------------------------------------------------
552 // Setting of SID sampling parameters.
553 //
554 // Use a clock freqency of 985248Hz for PAL C64, 1022730Hz for NTSC C64.
555 // The default end of passband frequency is pass_freq = 0.9*sample_freq/2
556 // for sample frequencies up to ~ 44.1kHz, and 20kHz for higher sample
557 // frequencies.
558 //
559 // For resampling, the ratio between the clock frequency and the sample
560 // frequency is limited as follows:
561 //   125*clock_freq/sample_freq < 16384
562 // E.g. provided a clock frequency of ~ 1MHz, the sample frequency can not
563 // be set lower than ~ 8kHz. A lower sample frequency would make the
564 // resampling code overfill its 16k sample ring buffer.
565 //
566 // The end of passband frequency is also limited:
567 //   pass_freq <= 0.9*sample_freq/2
568 
569 // E.g. for a 44.1kHz sampling rate the end of passband frequency is limited
570 // to slightly below 20kHz. This constraint ensures that the FIR table is
571 // not overfilled.
572 // ----------------------------------------------------------------------------
set_sampling_parameters(float clock_freq,sampling_method method,float sample_freq,float pass_freq)573 bool SIDFP::set_sampling_parameters(float clock_freq, sampling_method method,
574                                   float sample_freq, float pass_freq)
575 {
576   clock_frequency = clock_freq;
577   sampling = method;
578 
579   filter.set_clock_frequency(clock_freq);
580   extfilt.set_clock_frequency(clock_freq);
581   adjust_sampling_frequency(sample_freq);
582 
583   sample_offset = 0;
584   sample_prev = 0;
585 
586   // FIR initialization is only necessary for resampling.
587   if (method != SAMPLE_RESAMPLE_INTERPOLATE)
588   {
589     delete[] sample;
590     delete[] fir;
591     sample = 0;
592     fir = 0;
593     return true;
594   }
595 
596   const int bits = 16;
597 
598   if (pass_freq > 20000)
599     pass_freq = 20000;
600   if (2*pass_freq/sample_freq > 0.9)
601     pass_freq = 0.9f*sample_freq/2;
602 
603   // 16 bits -> -96dB stopband attenuation.
604   const double A = -20*log10(1.0/(1 << bits));
605 
606   // For calculation of beta and N see the reference for the kaiserord
607   // function in the MATLAB Signal Processing Toolbox:
608   // http://www.mathworks.com/access/helpdesk/help/toolbox/signal/kaiserord.html
609   const double beta = 0.1102*(A - 8.7);
610   const double I0beta = I0(beta);
611 
612   double f_samples_per_cycle = sample_freq/clock_freq;
613   double f_cycles_per_sample = clock_freq/sample_freq;
614 
615   /* This code utilizes the fact that aliasing back to 20 kHz from
616    * sample_freq/2 is inaudible. This allows us to define a passband
617    * wider than normally. We might also consider aliasing back to pass_freq,
618    * but as this can be less than 20 kHz, it might become audible... */
619   double aliasing_allowance = sample_freq / 2 - 20000;
620   if (aliasing_allowance < 0)
621     aliasing_allowance = 0;
622 
623   double transition_bandwidth = sample_freq/2 - pass_freq + aliasing_allowance;
624   {
625     /* Filter order according to Kaiser's paper. */
626 
627     int N = (int) ((A - 7.95)/(2 * M_PI * 2.285 * transition_bandwidth/sample_freq) + 0.5);
628     N += N & 1;
629 
630     // The filter length is equal to the filter order + 1.
631     // The filter length must be an odd number (sinc is symmetric about x = 0).
632     fir_N = int(N*f_cycles_per_sample) + 1;
633     fir_N |= 1;
634 
635     // Check whether the sample ring buffer would overfill.
636     if (fir_N > RINGSIZE - 1)
637       return false;
638 
639     /* Error is bound by 1.234 / L^2 */
640     fir_RES = (int) (sqrt(1.234 * (1 << bits)) / f_cycles_per_sample + 0.5);
641   }
642 
643   // Allocate memory for FIR tables.
644   delete[] fir;
645   fir = new float[fir_N*fir_RES];
646 
647   // The cutoff frequency is midway through the transition band.
648   double wc = (pass_freq + transition_bandwidth/2) / sample_freq * M_PI * 2;
649 
650   // Calculate fir_RES FIR tables for linear interpolation.
651   for (int i = 0; i < fir_RES; i++) {
652     double j_offset = double(i)/fir_RES;
653     // Calculate FIR table. This is the sinc function, weighted by the
654     // Kaiser window.
655     for (int j = 0; j < fir_N; j ++) {
656       double jx = j - fir_N/2. - j_offset;
657       double wt = wc*jx/f_cycles_per_sample;
658       double temp = jx/(fir_N/2);
659       double Kaiser =
660         fabs(temp) <= 1 ? I0(beta*sqrt(1 - temp*temp))/I0beta : 0;
661       double sincwt =
662         fabs(wt) >= 1e-8 ? sin(wt)/wt : 1;
663       fir[i * fir_N + j] = (float) (f_samples_per_cycle*wc/M_PI*sincwt*Kaiser);
664     }
665   }
666 
667   // Allocate sample buffer.
668   if (!sample) {
669     sample = new float[RINGSIZE*2];
670   }
671   // Clear sample buffer.
672   for (int j = 0; j < RINGSIZE*2; j++) {
673     sample[j] = 0;
674   }
675   sample_index = 0;
676 
677   return true;
678 }
679 
680 // ----------------------------------------------------------------------------
681 // Adjustment of SID sampling frequency.
682 //
683 // In some applications, e.g. a C64 emulator, it can be desirable to
684 // synchronize sound with a timer source. This is supported by adjustment of
685 // the SID sampling frequency.
686 //
687 // NB! Adjustment of the sampling frequency may lead to noticeable shifts in
688 // frequency, and should only be used for interactive applications. Note also
689 // that any adjustment of the sampling frequency will change the
690 // characteristics of the resampling filter, since the filter is not rebuilt.
691 // ----------------------------------------------------------------------------
adjust_sampling_frequency(float sample_freq)692 void SIDFP::adjust_sampling_frequency(float sample_freq)
693 {
694   cycles_per_sample = clock_frequency/sample_freq;
695 }
696 
age_bus_value(cycle_count n)697 void SIDFP::age_bus_value(cycle_count n) {
698   if (bus_value_ttl != 0) {
699     bus_value_ttl -= n;
700     if (bus_value_ttl <= 0) {
701         bus_value = 0;
702         bus_value_ttl = 0;
703     }
704   }
705 }
706 
707 // ----------------------------------------------------------------------------
708 // SID clocking - 1 cycle.
709 // ----------------------------------------------------------------------------
clock()710 void SIDFP::clock()
711 {
712   int i;
713 
714   // Clock amplitude modulators.
715   for (i = 0; i < 3; i++) {
716     voice[i].envelope.clock();
717   }
718 
719   // Clock oscillators.
720   for (i = 0; i < 3; i++) {
721     voice[i].wave.clock();
722   }
723 
724   // Synchronize oscillators.
725   for (i = 0; i < 3; i++) {
726     voice[i].wave.synchronize();
727   }
728 
729   // Clock filter.
730   extfilt.clock(filter.clock(voice[0].output(), voice[1].output(), voice[2].output(), ext_in));
731 }
732 
733 // ----------------------------------------------------------------------------
734 // SID clocking with audio sampling.
735 // Fixpoint arithmetics is used.
736 //
737 // The example below shows how to clock the SID a specified amount of cycles
738 // while producing audio output:
739 //
740 // while (delta_t) {
741 //   bufindex += sid.clock(delta_t, buf + bufindex, buflength - bufindex);
742 //   write(dsp, buf, bufindex*2);
743 //   bufindex = 0;
744 // }
745 //
746 // ----------------------------------------------------------------------------
clock(cycle_count & delta_t,short * buf,int n,int interleave)747 int SIDFP::clock(cycle_count& delta_t, short* buf, int n, int interleave)
748 {
749   /* XXX I assume n is generally large enough for delta_t here... */
750   age_bus_value(delta_t);
751   int res;
752   switch (sampling) {
753   default:
754   case SAMPLE_INTERPOLATE:
755     res = clock_interpolate(delta_t, buf, n, interleave);
756     break;
757   case SAMPLE_RESAMPLE_INTERPOLATE:
758     res = clock_resample_interpolate(delta_t, buf, n, interleave);
759     break;
760   }
761 
762   filter.nuke_denormals();
763   extfilt.nuke_denormals();
764 
765   return res;
766 }
767 
768 // ----------------------------------------------------------------------------
769 // SID clocking with audio sampling - cycle based with linear sample
770 // interpolation.
771 //
772 // Here the chip is clocked every cycle. This yields higher quality
773 // sound since the samples are linearly interpolated, and since the
774 // external filter attenuates frequencies above 16kHz, thus reducing
775 // sampling noise.
776 // ----------------------------------------------------------------------------
777 RESID_INLINE
clock_interpolate(cycle_count & delta_t,short * buf,int n,int interleave)778 int SIDFP::clock_interpolate(cycle_count& delta_t, short* buf, int n,
779                            int interleave)
780 {
781   int s = 0;
782   int i;
783 
784   for (;;) {
785     float next_sample_offset = sample_offset + cycles_per_sample;
786     int delta_t_sample = (int) next_sample_offset;
787     if (delta_t_sample > delta_t) {
788       break;
789     }
790     if (s >= n) {
791       return s;
792     }
793     for (i = 0; i < delta_t_sample - 1; i++) {
794       clock();
795     }
796     if (i < delta_t_sample) {
797       sample_prev = output();
798       clock();
799     }
800 
801     delta_t -= delta_t_sample;
802     sample_offset = next_sample_offset - delta_t_sample;
803 
804     float sample_now = output();
805     int v = (int)(sample_prev + (sample_offset * (sample_now - sample_prev)));
806     // Saturated arithmetics to guard against 16 bit sample overflow.
807     const int half = 1 << 15;
808     if (v >= half) {
809       v = half - 1;
810     }
811     else if (v < -half) {
812       v = -half;
813     }
814     buf[s++*interleave] = v;
815     sample_prev = sample_now;
816   }
817 
818   for (i = 0; i < delta_t - 1; i++) {
819     clock();
820   }
821   if (i < delta_t) {
822     sample_prev = output();
823     clock();
824   }
825   sample_offset -= delta_t;
826   delta_t = 0;
827   return s;
828 }
829 
830 // ----------------------------------------------------------------------------
831 // SID clocking with audio sampling - cycle based with audio resampling.
832 //
833 // This is the theoretically correct (and computationally intensive) audio
834 // sample generation. The samples are generated by resampling to the specified
835 // sampling frequency. The work rate is inversely proportional to the
836 // percentage of the bandwidth allocated to the filter transition band.
837 //
838 // This implementation is based on the paper "A Flexible Sampling-Rate
839 // Conversion Method", by J. O. Smith and P. Gosset, or rather on the
840 // expanded tutorial on the "Digital Audio Resampling Home Page":
841 // http://www-ccrma.stanford.edu/~jos/resample/
842 //
843 // By building shifted FIR tables with samples according to the
844 // sampling frequency, this implementation dramatically reduces the
845 // computational effort in the filter convolutions, without any loss
846 // of accuracy. The filter convolutions are also vectorizable on
847 // current hardware.
848 //
849 // Further possible optimizations are:
850 // * An equiripple filter design could yield a lower filter order, see
851 //   http://www.mwrf.com/Articles/ArticleID/7229/7229.html
852 // * The Convolution Theorem could be used to bring the complexity of
853 //   convolution down from O(n*n) to O(n*log(n)) using the Fast Fourier
854 //   Transform, see http://en.wikipedia.org/wiki/Convolution_theorem
855 // * Simply resampling in two steps can also yield computational
856 //   savings, since the transition band will be wider in the first step
857 //   and the required filter order is thus lower in this step.
858 //   Laurent Ganier has found the optimal intermediate sampling frequency
859 //   to be (via derivation of sum of two steps):
860 //     2 * pass_freq + sqrt [ 2 * pass_freq * orig_sample_freq
861 //       * (dest_sample_freq - 2 * pass_freq) / dest_sample_freq ]
862 //
863 // NB! the result of right shifting negative numbers is really
864 // implementation dependent in the C++ standard.
865 // ----------------------------------------------------------------------------
866 RESID_INLINE
clock_resample_interpolate(cycle_count & delta_t,short * buf,int n,int interleave)867 int SIDFP::clock_resample_interpolate(cycle_count& delta_t, short* buf, int n,
868                                     int interleave)
869 {
870   int s = 0;
871 
872   for (;;) {
873     float next_sample_offset = sample_offset + cycles_per_sample;
874     /* full clocks left to next sample */
875     int delta_t_sample = (int) next_sample_offset;
876     if (delta_t_sample > delta_t || s >= n)
877       break;
878 
879     /* clock forward delta_t_sample samples */
880     for (int i = 0; i < delta_t_sample; i++) {
881       clock();
882       sample[sample_index] = sample[sample_index + RINGSIZE] = output();
883       ++ sample_index;
884       sample_index &= RINGSIZE - 1;
885     }
886     delta_t -= delta_t_sample;
887 
888     /* Phase of the sample in terms of clock, [0 .. 1[. */
889     sample_offset = next_sample_offset - (float) delta_t_sample;
890 
891     /* find the first of the nearest fir tables close to the phase */
892     float fir_offset_rmd = sample_offset * fir_RES;
893     int fir_offset = (int) fir_offset_rmd;
894     /* [0 .. 1[ */
895     fir_offset_rmd -= (float) fir_offset;
896 
897     /* find fir_N most recent samples, plus one extra in case the FIR wraps. */
898     float* sample_start = sample + sample_index - fir_N + RINGSIZE - 1;
899 
900     float v1 =
901 #if (RESID_USE_SSE==1)
902       can_use_sse ? convolve_sse(sample_start, fir + fir_offset*fir_N, fir_N) :
903 #endif
904         convolve(sample_start, fir + fir_offset*fir_N, fir_N);
905 
906     // Use next FIR table, wrap around to first FIR table using
907     // previous sample.
908     if (++ fir_offset == fir_RES) {
909       fir_offset = 0;
910       ++ sample_start;
911     }
912     float v2 =
913 #if (RESID_USE_SSE==1)
914       can_use_sse ? convolve_sse(sample_start, fir + fir_offset*fir_N, fir_N) :
915 #endif
916         convolve(sample_start, fir + fir_offset*fir_N, fir_N);
917 
918     // Linear interpolation between the sinc tables yields good approximation
919     // for the exact value.
920     int v = (int) (v1 + fir_offset_rmd * (v2 - v1));
921 
922     // Saturated arithmetics to guard against 16 bit sample overflow.
923     const int half = 1 << 15;
924     if (v >= half) {
925       v = half - 1;
926     }
927     else if (v < -half) {
928       v = -half;
929     }
930 
931     buf[s ++ * interleave] = v;
932   }
933 
934   /* clock forward delta_t samples */
935   for (int i = 0; i < delta_t; i++) {
936     clock();
937     sample[sample_index] = sample[sample_index + RINGSIZE] = output();
938     ++ sample_index;
939     sample_index &= RINGSIZE - 1;
940   }
941   sample_offset -= (float) delta_t;
942   delta_t = 0;
943   return s;
944 }
945