1 ////////////////////////////////////////////////////////////////////////////////////////
2 //
3 // Nestopia - NES/Famicom emulator written in C++
4 //
5 // Copyright (C) 2003-2008 Martin Freij
6 //
7 // This file is part of Nestopia.
8 //
9 // Nestopia is free software; you can redistribute it and/or modify
10 // it under the terms of the GNU General Public License as published by
11 // the Free Software Foundation; either version 2 of the License, or
12 // (at your option) any later version.
13 //
14 // Nestopia is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 // GNU General Public License for more details.
18 //
19 // You should have received a copy of the GNU General Public License
20 // along with Nestopia; if not, write to the Free Software
21 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
22 //
23 ////////////////////////////////////////////////////////////////////////////////////////
24 
25 #include <cstring>
26 #include "NstCpu.hpp"
27 #include "NstState.hpp"
28 #include "api/NstApiSound.hpp"
29 #include "NstSoundRenderer.inl"
30 
31 namespace Nes
32 {
33 	namespace Core
34 	{
35 		const dword Apu::Cycles::frameClocks[3][4] =
36 		{
37 			{
38 				CPU_RP2A03_CC * 29830UL,
39 				CPU_RP2A03_CC,
40 				CPU_RP2A03_CC,
41 				CPU_RP2A03_CC * (29830UL - 2),
42 			},
43 			{
44 				CPU_RP2A07_CC * 33254UL,
45 				CPU_RP2A07_CC,
46 				CPU_RP2A07_CC,
47 				CPU_RP2A07_CC * (33254UL - 2)
48 			},
49 			{
50 				CPU_DENDY_CC * 29830UL,
51 				CPU_DENDY_CC,
52 				CPU_DENDY_CC,
53 				CPU_DENDY_CC * (29830UL - 2),
54 			}
55 		};
56 
57 		const dword Apu::Cycles::oscillatorClocks[3][2][4] =
58 		{
59 			{
60 				{
61 					CPU_RP2A03_CC * (7459UL - 1),
62 					CPU_RP2A03_CC * 7456UL,
63 					CPU_RP2A03_CC * 7458UL,
64 					CPU_RP2A03_CC * 7458UL
65 				},
66 				{
67 					CPU_RP2A03_CC * 7458UL,
68 					CPU_RP2A03_CC * 7456UL,
69 					CPU_RP2A03_CC * 7458UL,
70 					CPU_RP2A03_CC * (7458UL + 7452)
71 				}
72 			},
73 			{
74 				{
75 					CPU_RP2A07_CC * (8315UL - 1),
76 					CPU_RP2A07_CC * 8314UL,
77 					CPU_RP2A07_CC * 8312UL,
78 					CPU_RP2A07_CC * 8314UL
79 				},
80 				{
81 					CPU_RP2A07_CC * 8314UL,
82 					CPU_RP2A07_CC * 8314UL,
83 					CPU_RP2A07_CC * 8312UL,
84 					CPU_RP2A07_CC * (8314UL + 8312)
85 				}
86 			},
87 			{
88 				{
89 					CPU_DENDY_CC * (7459UL - 1),
90 					CPU_DENDY_CC * 7456UL,
91 					CPU_DENDY_CC * 7458UL,
92 					CPU_DENDY_CC * 7458UL
93 				},
94 				{
95 					CPU_DENDY_CC * 7458UL,
96 					CPU_DENDY_CC * 7456UL,
97 					CPU_DENDY_CC * 7458UL,
98 					CPU_DENDY_CC * (7458UL + 7452)
99 				}
100 			}
101 		};
102 
103 		const byte Apu::Channel::LengthCounter::lut[32] =
104 		{
105 			0x0A, 0xFE, 0x14, 0x02,
106 			0x28, 0x04, 0x50, 0x06,
107 			0xA0, 0x08, 0x3C, 0x0A,
108 			0x0E, 0x0C, 0x1A, 0x0E,
109 			0x0C, 0x10, 0x18, 0x12,
110 			0x30, 0x14, 0x60, 0x16,
111 			0xC0, 0x18, 0x48, 0x1A,
112 			0x10, 0x1C, 0x20, 0x1E
113 		};
114 
115 		const word Apu::Noise::lut[3][16] =
116 		{
117 			{
118 				0x004, 0x008, 0x010, 0x020,
119 				0x040, 0x060, 0x080, 0x0A0,
120 				0x0CA, 0x0FE, 0x17C, 0x1FC,
121 				0x2FA, 0x3F8, 0x7F2, 0xFE4
122 			},
123 			{
124 				0x004, 0x007, 0x00E, 0x01E,
125 				0x03C, 0x058, 0x076, 0x094,
126 				0x0BC, 0x0EC, 0x162, 0x1D8,
127 				0x2C4, 0x3B0, 0x762, 0xEC2
128 			},
129 			{
130 				0x004, 0x008, 0x010, 0x020,
131 				0x040, 0x060, 0x080, 0x0A0,
132 				0x0CA, 0x0FE, 0x17C, 0x1FC,
133 				0x2FA, 0x3F8, 0x7F2, 0xFE4
134 			}
135 		};
136 
137 		const word Apu::Dmc::lut[3][16] =
138 		{
139 			{
140 				0x1AC * CPU_RP2A03_CC,
141 				0x17C * CPU_RP2A03_CC,
142 				0x154 * CPU_RP2A03_CC,
143 				0x140 * CPU_RP2A03_CC,
144 				0x11E * CPU_RP2A03_CC,
145 				0x0FE * CPU_RP2A03_CC,
146 				0x0E2 * CPU_RP2A03_CC,
147 				0x0D6 * CPU_RP2A03_CC,
148 				0x0BE * CPU_RP2A03_CC,
149 				0x0A0 * CPU_RP2A03_CC,
150 				0x08E * CPU_RP2A03_CC,
151 				0x080 * CPU_RP2A03_CC,
152 				0x06A * CPU_RP2A03_CC,
153 				0x054 * CPU_RP2A03_CC,
154 				0x048 * CPU_RP2A03_CC,
155 				0x036 * CPU_RP2A03_CC
156 			},
157 			{
158 				0x18E * CPU_RP2A07_CC,
159 				0x162 * CPU_RP2A07_CC,
160 				0x13C * CPU_RP2A07_CC,
161 				0x12A * CPU_RP2A07_CC,
162 				0x114 * CPU_RP2A07_CC,
163 				0x0EC * CPU_RP2A07_CC,
164 				0x0D2 * CPU_RP2A07_CC,
165 				0x0C6 * CPU_RP2A07_CC,
166 				0x0B0 * CPU_RP2A07_CC,
167 				0x094 * CPU_RP2A07_CC,
168 				0x084 * CPU_RP2A07_CC,
169 				0x076 * CPU_RP2A07_CC,
170 				0x062 * CPU_RP2A07_CC,
171 				0x04E * CPU_RP2A07_CC,
172 				0x042 * CPU_RP2A07_CC,
173 				0x032 * CPU_RP2A07_CC
174 			},
175 			{
176 				0x1AC * CPU_DENDY_CC,
177 				0x17C * CPU_DENDY_CC,
178 				0x154 * CPU_DENDY_CC,
179 				0x140 * CPU_DENDY_CC,
180 				0x11E * CPU_DENDY_CC,
181 				0x0FE * CPU_DENDY_CC,
182 				0x0E2 * CPU_DENDY_CC,
183 				0x0D6 * CPU_DENDY_CC,
184 				0x0BE * CPU_DENDY_CC,
185 				0x0A0 * CPU_DENDY_CC,
186 				0x08E * CPU_DENDY_CC,
187 				0x080 * CPU_DENDY_CC,
188 				0x06A * CPU_DENDY_CC,
189 				0x054 * CPU_DENDY_CC,
190 				0x048 * CPU_DENDY_CC,
191 				0x036 * CPU_DENDY_CC
192 			}
193 		};
194 
195 		#ifdef NST_MSVC_OPTIMIZE
196 		#pragma optimize("s", on)
197 		#endif
198 
Apu(Cpu & c)199 		Apu::Apu(Cpu& c)
200 		:
201 		cpu        (c),
202 		extChannel (NULL),
203 		buffer     (16)
204 		{
205 			NST_COMPILE_ASSERT( CPU_RP2A03 == 0 && CPU_RP2A07 == 1 && CPU_DENDY == 2 );
206 
207 			PowerOff();
208 		}
209 
PowerOff()210 		void Apu::PowerOff()
211 		{
212 			Reset( false, true );
213 		}
214 
Reset(bool hard)215 		void Apu::Reset(bool hard)
216 		{
217 			Reset( true, hard );
218 		}
219 
Reset(const bool on,const bool hard)220 		void Apu::Reset(const bool on,const bool hard)
221 		{
222 			if (on)
223 				UpdateSettings();
224 
225 			updater = &Apu::SyncOff;
226 
227 			cycles.Reset( extChannel, cpu.GetModel() );
228 			synchronizer.Resync( settings.speed, cpu );
229 
230 			for (uint i=0; i < 2; ++i)
231 				square[i].Reset();
232 
233 			triangle.Reset();
234 			noise.Reset( cpu.GetModel() );
235 			dmc.Reset( cpu.GetModel() );
236 
237 			dcBlocker.Reset();
238 
239 			stream = NULL;
240 
241 			buffer.Reset( settings.bits );
242 
243 			if (on)
244 			{
245 				cpu.Map( 0x4000 ).Set( this, &Apu::Peek_40xx, &Apu::Poke_4000 );
246 				cpu.Map( 0x4001 ).Set( this, &Apu::Peek_40xx, &Apu::Poke_4001 );
247 				cpu.Map( 0x4002 ).Set( this, &Apu::Peek_40xx, &Apu::Poke_4002 );
248 				cpu.Map( 0x4003 ).Set( this, &Apu::Peek_40xx, &Apu::Poke_4003 );
249 				cpu.Map( 0x4004 ).Set( this, &Apu::Peek_40xx, &Apu::Poke_4000 );
250 				cpu.Map( 0x4005 ).Set( this, &Apu::Peek_40xx, &Apu::Poke_4001 );
251 				cpu.Map( 0x4006 ).Set( this, &Apu::Peek_40xx, &Apu::Poke_4002 );
252 				cpu.Map( 0x4007 ).Set( this, &Apu::Peek_40xx, &Apu::Poke_4003 );
253 				cpu.Map( 0x4008 ).Set( this, &Apu::Peek_40xx, &Apu::Poke_4008 );
254 				cpu.Map( 0x400A ).Set( this, &Apu::Peek_40xx, &Apu::Poke_400A );
255 				cpu.Map( 0x400B ).Set( this, &Apu::Peek_40xx, &Apu::Poke_400B );
256 				cpu.Map( 0x400C ).Set( this, &Apu::Peek_40xx, &Apu::Poke_400C );
257 				cpu.Map( 0x400E ).Set( this, &Apu::Peek_40xx, &Apu::Poke_400E );
258 				cpu.Map( 0x400F ).Set( this, &Apu::Peek_40xx, &Apu::Poke_400F );
259 				cpu.Map( 0x4010 ).Set( this, &Apu::Peek_40xx, &Apu::Poke_4010 );
260 				cpu.Map( 0x4011 ).Set( this, &Apu::Peek_40xx, &Apu::Poke_4011 );
261 				cpu.Map( 0x4012 ).Set( this, &Apu::Peek_40xx, &Apu::Poke_4012 );
262 				cpu.Map( 0x4013 ).Set( this, &Apu::Peek_40xx, &Apu::Poke_4013 );
263 				cpu.Map( 0x4015 ).Set( this, &Apu::Peek_4015, &Apu::Poke_4015 );
264 
265 				if (cpu.GetApu().IsGenie())
266 				{
267 					NES_DO_POKE(4000,0x4000,0x30);
268 					NES_DO_POKE(4001,0x4001,0xF9);
269 					NES_DO_POKE(400C,0x400C,0x30);
270 					NES_DO_POKE(400E,0x400E,0x0E);
271 					NES_DO_POKE(400F,0x400F,0x04);
272 					NES_DO_POKE(4015,0x4015,0x09);
273 				}
274 
275 				if (hard)
276 				{
277 					ctrl = STATUS_FRAME_IRQ_ENABLE;
278 				}
279 
280 				if (ctrl == STATUS_FRAME_IRQ_ENABLE)
281 					cycles.frameIrqClock = (cycles.frameCounter / cycles.fixed) - cpu.GetClock();
282 
283 				if (extChannel)
284 					extChannel->Reset();
285 			}
286 			else
287 			{
288 				ctrl = STATUS_FRAME_IRQ_ENABLE;
289 			}
290 		}
291 
SetSampleRate(const dword rate)292 		Result Apu::SetSampleRate(const dword rate)
293 		{
294 			if (settings.rate == rate)
295 				return RESULT_NOP;
296 
297 			if (!rate)
298 				return RESULT_ERR_INVALID_PARAM;
299 
300 			if (rate < 11025 || rate > 96000)
301 				return RESULT_ERR_UNSUPPORTED;
302 
303 			settings.rate = rate;
304 			UpdateSettings();
305 
306 			return RESULT_OK;
307 		}
308 
SetSampleBits(const uint bits)309 		Result Apu::SetSampleBits(const uint bits)
310 		{
311 			if (settings.bits == bits)
312 				return RESULT_NOP;
313 
314 			if (!bits)
315 				return RESULT_ERR_INVALID_PARAM;
316 
317 			if (bits != 8 && bits != 16)
318 				return RESULT_ERR_UNSUPPORTED;
319 
320 			settings.bits = bits;
321 			UpdateSettings();
322 
323 			return RESULT_OK;
324 		}
325 
SetVolume(const uint channels,const uint volume)326 		Result Apu::SetVolume(const uint channels,const uint volume)
327 		{
328 			if (volume > 100)
329 				return RESULT_ERR_INVALID_PARAM;
330 
331 			bool updated = false;
332 
333 			for (uint i=0; i < MAX_CHANNELS; ++i)
334 			{
335 				if (channels & (1U << i))
336 				{
337 					if (settings.volumes[i] != volume)
338 					{
339 						settings.volumes[i] = volume;
340 						updated = true;
341 					}
342 				}
343 			}
344 
345 			if (!updated)
346 				return RESULT_NOP;
347 
348 			UpdateSettings();
349 
350 			return RESULT_OK;
351 		}
352 
GetVolume(const uint channel) const353 		uint Apu::GetVolume(const uint channel) const
354 		{
355 			for (uint i=0; i < MAX_CHANNELS; ++i)
356 			{
357 				if (channel & (1U << i))
358 					return settings.volumes[i];
359 			}
360 
361 			return 0;
362 		}
363 
GetCtrl()364 		uint Apu::GetCtrl()
365 		{
366 			return ctrl;
367 		}
368 
SetSpeed(const uint speed)369 		Result Apu::SetSpeed(const uint speed)
370 		{
371 			if (settings.speed == speed)
372 				return RESULT_NOP;
373 
374 			if ((speed > 0 && speed < 30) || speed > 240)
375 				return RESULT_ERR_UNSUPPORTED;
376 
377 			settings.speed = speed;
378 			UpdateSettings();
379 
380 			return RESULT_OK;
381 		}
382 
Mute(const bool mute)383 		void Apu::Mute(const bool mute)
384 		{
385 			if (settings.muted != mute)
386 			{
387 				settings.muted = mute;
388 				UpdateSettings();
389 			}
390 		}
391 
SetAutoTranspose(const bool transpose)392 		void Apu::SetAutoTranspose(const bool transpose)
393 		{
394 			if (settings.transpose != transpose)
395 			{
396 				settings.transpose = transpose;
397 				UpdateSettings();
398 			}
399 		}
400 
SetGenie(const bool genie)401 		void Apu::SetGenie(const bool genie)
402 		{
403 			if (settings.genie != genie)
404 			{
405 				settings.genie = genie;
406 				UpdateSettings();
407 			}
408 		}
409 
EnableStereo(const bool enable)410 		void Apu::EnableStereo(const bool enable)
411 		{
412 			if (settings.stereo != enable)
413 			{
414 				settings.stereo = enable;
415 				UpdateSettings();
416 			}
417 		}
418 
UpdateSettings()419 		void Apu::UpdateSettings()
420 		{
421 			cycles.Update( settings.rate, settings.speed, cpu );
422 			synchronizer.Reset( settings.speed, settings.rate, cpu );
423 			dcBlocker.Reset();
424 			buffer.Reset( settings.bits );
425 
426 			Cycle rate; uint fixed;
427 			CalculateOscillatorClock( rate, fixed );
428 
429 			square[0].UpdateSettings ( settings.muted ? 0 : settings.volumes[ Channel::APU_SQUARE1  ], rate, fixed );
430 			square[1].UpdateSettings ( settings.muted ? 0 : settings.volumes[ Channel::APU_SQUARE2  ], rate, fixed );
431 			triangle.UpdateSettings  ( settings.muted ? 0 : settings.volumes[ Channel::APU_TRIANGLE ], rate, fixed );
432 			noise.UpdateSettings     ( settings.muted ? 0 : settings.volumes[ Channel::APU_NOISE    ], rate, fixed );
433 			dmc.UpdateSettings       ( settings.muted ? 0 : settings.volumes[ Channel::APU_DPCM     ] );
434 
435 			UpdateVolumes();
436 		}
437 
UpdateVolumes()438 		void Apu::UpdateVolumes()
439 		{
440 			settings.audible = (extChannel && extChannel->UpdateSettings()) ||
441 			(
442 				uint(settings.volumes[ Channel::APU_SQUARE1  ]) |
443 				uint(settings.volumes[ Channel::APU_SQUARE2  ]) |
444 				uint(settings.volumes[ Channel::APU_TRIANGLE ]) |
445 				uint(settings.volumes[ Channel::APU_NOISE    ]) |
446 				uint(settings.volumes[ Channel::APU_DPCM     ])
447 			);
448 		}
449 
Resync(const dword rate)450 		void Apu::Resync(const dword rate)
451 		{
452 			cycles.Update( rate, settings.speed, cpu );
453 			ClearBuffers( false );
454 		}
455 
CalculateOscillatorClock(Cycle & rate,uint & fixed) const456 		void Apu::CalculateOscillatorClock(Cycle& rate,uint& fixed) const
457 		{
458 			dword sampleRate = settings.rate;
459 
460 			if (settings.transpose && settings.speed)
461 				sampleRate = sampleRate * cpu.GetFps() / settings.speed;
462 
463 			uint multiplier = 0;
464 			const qaword clockBase = cpu.GetClockBase();
465 
466 			while (++multiplier < 0x1000 && clockBase * (multiplier+1) / sampleRate <= 0x7FFFF && clockBase * multiplier % sampleRate);
467 
468 			rate = clockBase * multiplier / sampleRate;
469 			fixed = cpu.GetClockDivider() * cpu.GetClock() * multiplier;
470 		}
471 
SaveState(State::Saver & state,const dword baseChunk) const472 		void Apu::SaveState(State::Saver& state,const dword baseChunk) const
473 		{
474 			state.Begin( baseChunk );
475 
476 			{
477 				Cycle clock = cycles.frameCounter / cycles.fixed;
478 
479 				NST_VERIFY( clock >= cpu.GetCycles() );
480 
481 				if (clock > cpu.GetCycles())
482 					clock = (clock - cpu.GetCycles()) / cpu.GetClock();
483 				else
484 					clock = 0;
485 
486 				NST_VERIFY( cycles.frameCounter == (cpu.GetCycles() + clock * cpu.GetClock()) * cycles.fixed );
487 
488 				const byte data[4] =
489 				{
490 					ctrl,
491 					clock & 0xFF,
492 					clock >> 8,
493 					cycles.frameDivider
494 				};
495 
496 				state.Begin( AsciiId<'F','R','M'>::V ).Write( data ).End();
497 			}
498 
499 			if (cycles.frameIrqClock != Cpu::CYCLE_MAX)
500 			{
501 				Cycle clock = cycles.frameIrqClock;
502 
503 				NST_VERIFY( clock >= cpu.GetCycles() );
504 
505 				if (clock > cpu.GetCycles())
506 					clock = (clock - cpu.GetCycles()) / cpu.GetClock();
507 				else
508 					clock = 0;
509 
510 				NST_VERIFY( cycles.frameIrqClock == cpu.GetCycles() + clock * cpu.GetClock() );
511 
512 				const byte data[3] =
513 				{
514 					clock & 0xFF,
515 					clock >> 8,
516 					cycles.frameIrqRepeat % 3
517 				};
518 
519 				state.Begin( AsciiId<'I','R','Q'>::V ).Write( data ).End();
520 			}
521 
522 			if (cycles.extCounter != Cpu::CYCLE_MAX)
523 			{
524 				Cycle clock = cycles.extCounter / cycles.fixed;
525 
526 				NST_VERIFY( clock >= cpu.GetCycles() || clock == 0 );
527 
528 				if (clock > cpu.GetCycles())
529 				{
530 					clock = (clock - cpu.GetCycles()) / cpu.GetClock();
531 					NST_VERIFY( cycles.extCounter == (cpu.GetCycles() + clock * cpu.GetClock()) * cycles.fixed );
532 				}
533 				else
534 				{
535 					clock = 0;
536 				}
537 
538 				state.Begin( AsciiId<'E','X','T'>::V ).Write16( clock ).End();
539 			}
540 
541 			square[0].SaveState( state, AsciiId<'S','Q','0'>::V );
542 			square[1].SaveState( state, AsciiId<'S','Q','1'>::V );
543 			triangle.SaveState( state, AsciiId<'T','R','I'>::V );
544 			noise.SaveState( state, AsciiId<'N','O','I'>::V );
545 			dmc.SaveState( state, AsciiId<'D','M','C'>::V, cpu, cycles.dmcClock );
546 
547 			dcBlocker.SaveState( state, AsciiId<'D','C','B'>::V );
548 
549 			{
550 				const byte data[4] =
551 				{
552 					cycles.rateCounter & 0xFFU,
553 					cycles.rateCounter >> 8,
554 					cycles.rateCounter >> 16,
555 					cycles.rateCounter >> 24,
556 				};
557 
558 				state.Begin( AsciiId<'S','0','0'>::V ).Write( data ).End();
559 			}
560 
561 			state.End();
562 		}
563 
LoadState(State::Loader & state)564 		void Apu::LoadState(State::Loader& state)
565 		{
566 			cycles.frameIrqClock = Cpu::CYCLE_MAX;
567 			cycles.frameIrqRepeat = 0;
568 
569 			while (const dword chunk = state.Begin())
570 			{
571 				switch (chunk)
572 				{
573 					case AsciiId<'F','R','M'>::V:
574 					{
575 						State::Loader::Data<4> data( state );
576 
577 						ctrl = data[0] & STATUS_BITS;
578 
579 						cycles.rateCounter = cycles.fixed * cpu.GetCycles();
580 
581 						cycles.frameCounter = cycles.fixed *
582 						(
583 							cpu.GetCycles() + (data[1] | data[2] << 8) * cpu.GetClock()
584 						);
585 
586 						cycles.frameDivider = data[3] & 0x3;
587 						break;
588 					}
589 
590 					case AsciiId<'I','R','Q'>::V:
591 					{
592 						State::Loader::Data<3> data( state );
593 
594 						cycles.frameIrqClock = cpu.GetCycles() +
595 						(
596 							(data[0] | data[1] << 8) * cpu.GetClock()
597 						);
598 
599 						cycles.frameIrqRepeat = (data[2] & 0x3) % 3;
600 						break;
601 					}
602 
603 					case AsciiId<'E','X','T'>::V:
604 
605 						NST_VERIFY( cycles.extCounter != Cpu::CYCLE_MAX );
606 
607 						if (cycles.extCounter != Cpu::CYCLE_MAX)
608 						{
609 							cycles.extCounter = cycles.fixed *
610 							(
611 								cpu.GetCycles() + state.Read16() * cpu.GetClock()
612 							);
613 						}
614 						break;
615 
616 					case AsciiId<'S','Q','0'>::V:
617 
618 						square[0].LoadState( state );
619 						break;
620 
621 					case AsciiId<'S','Q','1'>::V:
622 
623 						square[1].LoadState( state );
624 						break;
625 
626 					case AsciiId<'T','R','I'>::V:
627 
628 						triangle.LoadState( state );
629 						break;
630 
631 					case AsciiId<'N','O','I'>::V:
632 
633 						noise.LoadState( state, cpu.GetModel() );
634 						break;
635 
636 					case AsciiId<'D','M','C'>::V:
637 
638 						dmc.LoadState( state, cpu, cpu.GetModel(), cycles.dmcClock );
639 						break;
640 
641 					case AsciiId<'D','C','B'>::V:
642 
643 						dcBlocker.LoadState( state );
644 						break;
645 
646 					case AsciiId<'S','0','0'>::V:
647 					{
648 						State::Loader::Data<4> data( state );
649 
650 						cycles.rateCounter = data[0] | (data[1] << 8) | (data[2] << 16) | (data[3] << 24);
651 						break;
652 					}
653 				}
654 
655 				state.End();
656 			}
657 
658 			if (ctrl != STATUS_FRAME_IRQ_ENABLE)
659 			{
660 				cycles.frameIrqClock = Cpu::CYCLE_MAX;
661 				cycles.frameIrqRepeat = 0;
662 			}
663 			else if (cycles.frameIrqClock == Cpu::CYCLE_MAX)
664 			{
665 				cycles.frameIrqClock = (cycles.frameCounter / cycles.fixed) + (3 - cycles.frameDivider) * (Cycles::frameClocks[cpu.GetModel()][0] / 4);
666 				cycles.frameIrqRepeat = 0;
667 			}
668 		}
669 
670 		#ifdef NST_MSVC_OPTIMIZE
671 		#pragma optimize("", on)
672 		#endif
673 
SyncOn(const Cycle target)674 		void NST_FASTCALL Apu::SyncOn(const Cycle target)
675 		{
676 			NST_ASSERT( (stream && settings.audible) && (cycles.rate && cycles.fixed) && (cycles.extCounter == Cpu::CYCLE_MAX) );
677 
678 			if (cycles.rateCounter < target)
679 			{
680 				Cycle rateCounter = cycles.rateCounter;
681 				const Cycle rate = cycles.rate;
682 
683 				do
684 				{
685 					buffer << GetSample();
686 
687 					if (cycles.frameCounter <= rateCounter)
688 						ClockFrameCounter();
689 
690 					rateCounter += rate;
691 				}
692 				while (rateCounter < target);
693 
694 				cycles.rateCounter = rateCounter;
695 			}
696 
697 			if (cycles.frameCounter < target)
698 			{
699 				ClockFrameCounter();
700 				NST_ASSERT( cycles.frameCounter >= target );
701 			}
702 		}
703 
SyncOnExt(const Cycle target)704 		void NST_FASTCALL Apu::SyncOnExt(const Cycle target)
705 		{
706 			NST_ASSERT( (stream && settings.audible) && (cycles.rate && cycles.fixed) && extChannel );
707 
708 			Cycle extCounter = cycles.extCounter;
709 
710 			if (cycles.rateCounter < target)
711 			{
712 				Cycle rateCounter = cycles.rateCounter;
713 
714 				do
715 				{
716 					buffer << GetSample();
717 
718 					if (extCounter <= rateCounter)
719 						extCounter = extChannel->Clock( extCounter, cycles.fixed, rateCounter );
720 
721 					if (cycles.frameCounter <= rateCounter)
722 						ClockFrameCounter();
723 
724 					rateCounter += cycles.rate;
725 				}
726 				while (rateCounter < target);
727 
728 				cycles.rateCounter = rateCounter;
729 			}
730 
731 			if (extCounter <= target)
732 			{
733 				cycles.extCounter = extChannel->Clock( extCounter, cycles.fixed, target );
734 				NST_ASSERT( cycles.extCounter > target );
735 			}
736 			else
737 			{
738 				cycles.extCounter = extCounter;
739 			}
740 
741 			if (cycles.frameCounter < target)
742 			{
743 				ClockFrameCounter();
744 				NST_ASSERT( cycles.frameCounter >= target );
745 			}
746 		}
747 
SyncOff(const Cycle target)748 		void NST_FASTCALL Apu::SyncOff(const Cycle target)
749 		{
750 			NST_ASSERT( !(stream && settings.audible) && cycles.fixed );
751 
752 			cycles.rateCounter = target;
753 
754 			while (cycles.frameCounter < target)
755 				ClockFrameCounter();
756 
757 			NST_ASSERT( cycles.extCounter == Cpu::CYCLE_MAX || extChannel );
758 
759 			if (cycles.extCounter <= target)
760 			{
761 				cycles.extCounter = extChannel->Clock( cycles.extCounter, cycles.fixed, target );
762 				NST_ASSERT( cycles.extCounter > target );
763 			}
764 		}
765 
BeginFrame(Sound::Output * output)766 		void Apu::BeginFrame(Sound::Output* output)
767 		{
768 			stream = output;
769 			updater = (output && settings.audible ? (cycles.extCounter == Cpu::CYCLE_MAX ? &Apu::SyncOn : &Apu::SyncOnExt) : &Apu::SyncOff);
770 		}
771 
Update(const Cycle target)772 		inline void Apu::Update(const Cycle target)
773 		{
774 			NST_ASSERT( cycles.fixed );
775 			(*this.*updater)( target * cycles.fixed );
776 		}
777 
Update()778 		void Apu::Update()
779 		{
780 			Update( cpu.Update() );
781 		}
782 
UpdateLatency()783 		void Apu::UpdateLatency()
784 		{
785 			Update( cpu.Update() + 1 );
786 		}
787 
UpdateDelta()788 		bool Apu::UpdateDelta()
789 		{
790 			const Cycle elapsed = cpu.Update();
791 			const bool delta = cycles.frameCounter != elapsed * cycles.fixed;
792 			Update( elapsed + 1 );
793 			return delta;
794 		}
795 
796 		template<typename T,bool STEREO>
FlushSound()797 		void Apu::FlushSound()
798 		{
799 			NST_ASSERT( (stream && settings.audible) && (cycles.rate && cycles.fixed) );
800 
801 			for (uint i=0; i < 2; ++i)
802 			{
803 				if (stream->length[i] && stream->samples[i])
804 				{
805 					Sound::Buffer::Block block( stream->length[i] );
806 					buffer >> block;
807 
808 					Sound::Buffer::Renderer<T,STEREO> output( stream->samples[i], stream->length[i], buffer.history );
809 
810 					if (output << block)
811 					{
812 						const Cycle target = cpu.GetCycles() * cycles.fixed;
813 
814 						if (cycles.rateCounter < target)
815 						{
816 							Cycle rateCounter = cycles.rateCounter;
817 
818 							do
819 							{
820 								output << GetSample();
821 
822 								if (cycles.frameCounter <= rateCounter)
823 									ClockFrameCounter();
824 
825 								if (cycles.extCounter <= rateCounter)
826 									cycles.extCounter = extChannel->Clock( cycles.extCounter, cycles.fixed, rateCounter );
827 
828 								rateCounter += cycles.rate;
829 							}
830 							while (rateCounter < target && output);
831 
832 							cycles.rateCounter = rateCounter;
833 						}
834 
835 						if (output)
836 						{
837 							if (cycles.frameCounter < target)
838 								ClockFrameCounter();
839 
840 							if (cycles.extCounter <= target)
841 								cycles.extCounter = extChannel->Clock( cycles.extCounter, cycles.fixed, target );
842 
843 							do
844 							{
845 								output << GetSample();
846 							}
847 							while (output);
848 						}
849 					}
850 				}
851 			}
852 		}
853 
EndFrame()854 		void Apu::EndFrame()
855 		{
856 			NST_ASSERT( (stream && settings.audible) == (updater != &Apu::SyncOff) );
857 
858 			if (updater != &Apu::SyncOff)
859 			{
860 				dword streamed = 0;
861 
862 				if (Sound::Output::lockCallback( *stream ))
863 				{
864 					streamed = stream->length[0] + stream->length[1];
865 
866 					if (settings.bits == 16)
867 					{
868 						if (!settings.stereo)
869 							FlushSound<iword,false>();
870 						else
871 							FlushSound<iword,true>();
872 					}
873 					else
874 					{
875 						if (!settings.stereo)
876 							FlushSound<byte,false>();
877 						else
878 							FlushSound<byte,true>();
879 					}
880 
881 					Sound::Output::unlockCallback( *stream );
882 				}
883 
884 				if (const dword rate = synchronizer.Clock( streamed, settings.rate, cpu ))
885 					Resync( rate );
886 			}
887 
888 			Update( cpu.GetCycles() );
889 
890 			Cycle frame = cpu.GetFrameCycles();
891 
892 			NST_ASSERT
893 			(
894 				cycles.dmcClock >= frame &&
895 				cycles.frameIrqClock >= frame
896 			);
897 
898 			cycles.dmcClock -= frame;
899 
900 			if (cycles.frameIrqClock != Cpu::CYCLE_MAX)
901 				cycles.frameIrqClock -= frame;
902 
903 			frame *= cycles.fixed;
904 
905 			NST_ASSERT
906 			(
907 				cycles.rateCounter >= frame &&
908 				cycles.frameCounter >= frame &&
909 				cycles.extCounter >= frame
910 			);
911 
912 			cycles.rateCounter -= frame;
913 			cycles.frameCounter -= frame;
914 
915 			if (cycles.extCounter != Cpu::CYCLE_MAX)
916 				cycles.extCounter -= frame;
917 		}
918 
919 		#ifdef NST_MSVC_OPTIMIZE
920 		#pragma optimize("s", on)
921 		#endif
922 
Settings()923 		Apu::Settings::Settings()
924 		: rate(44100), bits(16), speed(0), muted(false), transpose(false), stereo(false), audible(true)
925 		{
926 			for (uint i=0; i < MAX_CHANNELS; ++i)
927 				volumes[i] = Channel::DEFAULT_VOLUME;
928 		}
929 
Cycles()930 		Apu::Cycles::Cycles()
931 		: fixed(1), rate(1) {}
932 
Reset(const bool extChannel,const CpuModel model)933 		void Apu::Cycles::Reset(const bool extChannel,const CpuModel model)
934 		{
935 			rateCounter = 0;
936 			frameDivider = 0;
937 			frameIrqClock = Cpu::CYCLE_MAX;
938 			frameIrqRepeat = 0;
939 			dmcClock = Dmc::GetResetFrequency( model );
940 			frameCounter = frameClocks[model][0] * fixed;
941 			extCounter = (extChannel ? 0UL : Cpu::CYCLE_MAX);
942 		}
943 
Update(dword sampleRate,const uint speed,const Cpu & cpu)944 		void Apu::Cycles::Update(dword sampleRate,const uint speed,const Cpu& cpu)
945 		{
946 			frameCounter /= fixed;
947 			rateCounter /= fixed;
948 
949 			if (extCounter != Cpu::CYCLE_MAX)
950 				extCounter /= fixed;
951 
952 			if (speed)
953 				sampleRate = sampleRate * cpu.GetFps() / speed;
954 
955 			uint multiplier = 0;
956 			const qaword clockBase = cpu.GetClockBase();
957 
958 			while (++multiplier < 512 && clockBase * multiplier % sampleRate);
959 
960 			rate = clockBase * multiplier / sampleRate;
961 			fixed = cpu.GetClockDivider() * multiplier;
962 
963 			frameCounter *= fixed;
964 			rateCounter *= fixed;
965 
966 			if (extCounter != Cpu::CYCLE_MAX)
967 				extCounter *= fixed;
968 		}
969 
Synchronizer()970 		Apu::Synchronizer::Synchronizer()
971 		: rate(0) {}
972 
Resync(uint speed,const Cpu & cpu)973 		void Apu::Synchronizer::Resync(uint speed,const Cpu& cpu)
974 		{
975 			duty = 0;
976 			streamed = 0;
977 
978 			if (speed == 0 || speed == cpu.GetFps())
979 				sync = 4;
980 			else
981 				sync = 0;
982 		}
983 
Reset(uint speed,dword sampleRate,const Cpu & cpu)984 		void Apu::Synchronizer::Reset(uint speed,dword sampleRate,const Cpu& cpu)
985 		{
986 			rate = sampleRate;
987 			Resync( speed, cpu );
988 		}
989 
990 		#ifdef NST_MSVC_OPTIMIZE
991 		#pragma optimize("", on)
992 		#endif
993 
Clock(const dword output,const dword sampleRate,const Cpu & cpu)994 		NST_SINGLE_CALL dword Apu::Synchronizer::Clock(const dword output,const dword sampleRate,const Cpu& cpu)
995 		{
996 			/*if (sync)
997 			{
998 				if (duty >= 60*4)
999 					streamed += output;
1000 
1001 				if (duty < 60*12)
1002 				{
1003 					duty++;
1004 				}
1005 				else
1006 				{
1007 					duty = 60*4;
1008 
1009 					dword actualRate = streamed / (60*8) * cpu.GetFps();
1010 					const dword limit = sampleRate / 21;
1011 
1012 					if (actualRate <= sampleRate-limit)
1013 					{
1014 						actualRate = sampleRate-limit;
1015 						sync--;
1016 					}
1017 					else if (actualRate >= sampleRate+limit)
1018 					{
1019 						actualRate = sampleRate+limit;
1020 						sync--;
1021 					}
1022 					else
1023 					{
1024 						sync = (sync > 2 ? sync - 2 : 0);
1025 					}
1026 
1027 					actualRate = actualRate * 9999 / 10000;
1028 					streamed = 0;
1029 
1030 					if (rate != actualRate)
1031 					{
1032 						rate = actualRate;
1033 						return actualRate;
1034 					}
1035 				}
1036 			}*/
1037 
1038 			return 0;
1039 		}
1040 
1041 		#ifdef NST_MSVC_OPTIMIZE
1042 		#pragma optimize("s", on)
1043 		#endif
1044 
LengthCounter()1045 		Apu::Channel::LengthCounter::LengthCounter()
1046 		{
1047 			Reset();
1048 		}
1049 
Reset()1050 		void Apu::Channel::LengthCounter::Reset()
1051 		{
1052 			enabled = 0;
1053 			count = 0;
1054 		}
1055 
LoadState(State::Loader & state)1056 		void Apu::Channel::LengthCounter::LoadState(State::Loader& state)
1057 		{
1058 			const uint data = state.Read8();
1059 			enabled = (data == 0xFF ? 0U : ~0U);
1060 			count = data & enabled;
1061 		}
1062 
SaveState(State::Saver & state,const dword chunk) const1063 		void Apu::Channel::LengthCounter::SaveState(State::Saver& state,const dword chunk) const
1064 		{
1065 			NST_VERIFY( count < 0xFF );
1066 			state.Begin( chunk ).Write8( enabled ? count : 0xFF ).End();
1067 		}
1068 
Envelope()1069 		Apu::Channel::Envelope::Envelope()
1070 		: outputVolume(OUTPUT_MUL)
1071 		{
1072 			Reset();
1073 		}
1074 
Reset()1075 		void Apu::Channel::Envelope::Reset()
1076 		{
1077 			output = 0;
1078 			regs[0] = 0x0;
1079 			regs[1] = 0x10;
1080 			count = 0;
1081 			reset = false;
1082 		}
1083 
SetOutputVolume(uint v)1084 		void Apu::Channel::Envelope::SetOutputVolume(uint v)
1085 		{
1086 			outputVolume = v;
1087 			UpdateOutput();
1088 		}
1089 
SaveState(State::Saver & state,const dword chunk) const1090 		void Apu::Channel::Envelope::SaveState(State::Saver& state,const dword chunk) const
1091 		{
1092 			const byte data[3] =
1093 			{
1094 				count,
1095 				regs[0] | (reset ? 0x80U : 0x00U),
1096 				regs[1]
1097 			};
1098 
1099 			state.Begin( chunk ).Write( data ).End();
1100 		}
1101 
LoadState(State::Loader & state)1102 		void Apu::Channel::Envelope::LoadState(State::Loader& state)
1103 		{
1104 			State::Loader::Data<3> data( state );
1105 
1106 			count   = data[0] & 0x0F;
1107 			reset   = data[1] >> 7;
1108 			regs[0] = data[1] & 0x0F;
1109 			regs[1] = data[2];
1110 
1111 			UpdateOutput();
1112 		}
1113 
1114 		#ifdef NST_MSVC_OPTIMIZE
1115 		#pragma optimize("", on)
1116 		#endif
1117 
UpdateOutput()1118 		void Apu::Channel::Envelope::UpdateOutput()
1119 		{
1120 			output = (regs[regs[1] >> 4 & 1U] & 0xFUL) * outputVolume;
1121 		}
1122 
Clock()1123 		void Apu::Channel::Envelope::Clock()
1124 		{
1125 			if (!reset)
1126 			{
1127 				if (count)
1128 				{
1129 					count--;
1130 					return;
1131 				}
1132 
1133 				if (regs[0] | (regs[1] & 0x20U))
1134 					regs[0] = (regs[0] - 1U) & 0xF;
1135 			}
1136 			else
1137 			{
1138 				reset = false;
1139 				regs[0] = 0xF;
1140 			}
1141 
1142 			count = regs[1] & 0x0FU;
1143 			UpdateOutput();
1144 		}
1145 
Write(const uint data)1146 		void Apu::Channel::Envelope::Write(const uint data)
1147 		{
1148 			regs[1] = data;
1149 			UpdateOutput();
1150 		}
1151 
1152 		#ifdef NST_MSVC_OPTIMIZE
1153 		#pragma optimize("s", on)
1154 		#endif
1155 
DcBlocker()1156 		Apu::Channel::DcBlocker::DcBlocker()
1157 		{
1158 			Reset();
1159 		}
1160 
Reset()1161 		void Apu::Channel::DcBlocker::Reset()
1162 		{
1163 			acc = 0;
1164 			prev = 0;
1165 			next = 0;
1166 		}
1167 
1168 		#ifdef NST_MSVC_OPTIMIZE
1169 		#pragma optimize("", on)
1170 		#endif
1171 
Apply(Sample sample)1172 		Apu::Channel::Sample Apu::Channel::DcBlocker::Apply(Sample sample)
1173 		{
1174 			acc  -= prev;
1175 			prev  = signed_shl(sample,15);
1176 			acc  += prev - next * POLE;
1177 			next  = signed_shr(acc,15);
1178 			return next;
1179 		}
1180 
SaveState(State::Saver & state,const dword chunk) const1181 		void Apu::Channel::DcBlocker::SaveState(State::Saver& state,const dword chunk) const
1182 		{
1183 			state.Begin( chunk );
1184 
1185 			{
1186 				const byte data[12] =
1187 				{
1188 					acc & 0xFFU,
1189 					acc >> 8,
1190 					acc >> 16,
1191 					acc >> 24,
1192 					prev & 0xFFU,
1193 					prev >> 8,
1194 					prev >> 16,
1195 					prev >> 24,
1196 					next & 0xFFU,
1197 					next >> 8,
1198 					next >> 16,
1199 					next >> 24,
1200 				};
1201 
1202 				state.Begin( AsciiId<'S','0','0'>::V ).Write( data ).End();
1203 			}
1204 
1205 			state.End();
1206 		}
1207 
LoadState(State::Loader & state)1208 		void Apu::Channel::DcBlocker::LoadState(State::Loader& state)
1209 		{
1210 			while (const dword chunk = state.Begin())
1211 			{
1212 				switch (chunk)
1213 				{
1214 					case AsciiId<'S','0','0'>::V:
1215 					{
1216 						State::Loader::Data<12> data( state );
1217 
1218 						acc = data[0] | (data[1] << 8) | (data[2] << 16) | (data[3] << 24);
1219 						prev = data[4] | (data[5] << 8) | (data[6] << 16) | (data[7] << 24);
1220 						next = data[8] | (data[9] << 8) | (data[10] << 16) | (data[11] << 24);
1221 						break;
1222 					}
1223 				}
1224 
1225 				state.End();
1226 			}
1227 		}
1228 
1229 		#ifdef NST_MSVC_OPTIMIZE
1230 		#pragma optimize("s", on)
1231 		#endif
1232 
Channel(Apu & a)1233 		Apu::Channel::Channel(Apu& a)
1234 		: apu(a) {}
1235 
~Channel()1236 		Apu::Channel::~Channel()
1237 		{
1238 			if (apu.extChannel == this)
1239 			{
1240 				apu.extChannel = NULL;
1241 				apu.UpdateVolumes();
1242 			}
1243 		}
1244 
Connect(bool audible)1245 		void Apu::Channel::Connect(bool audible)
1246 		{
1247 			NST_ASSERT( apu.extChannel == NULL );
1248 
1249 			if (audible)
1250 				apu.settings.audible = true;
1251 			else
1252 				apu.UpdateVolumes();
1253 
1254 			apu.extChannel = this;
1255 		}
1256 
GetOscillatorClock(Cycle & rate,uint & fixed) const1257 		void Apu::Channel::GetOscillatorClock(Cycle& rate,uint& fixed) const
1258 		{
1259 			apu.CalculateOscillatorClock( rate, fixed );
1260 		}
1261 
GetVolume(uint channel) const1262 		uint Apu::Channel::GetVolume(uint channel) const
1263 		{
1264 			NST_ASSERT( channel < MAX_CHANNELS );
1265 			return apu.settings.volumes[channel];
1266 		}
1267 
1268 		#ifdef NST_MSVC_OPTIMIZE
1269 		#pragma optimize("", on)
1270 		#endif
1271 
GetCpuClockBase() const1272 		Cycle Apu::Channel::GetCpuClockBase() const
1273 		{
1274 			return apu.cpu.GetClockBase();
1275 		}
1276 
GetCpuClockDivider() const1277 		uint Apu::Channel::GetCpuClockDivider() const
1278 		{
1279 			return apu.cpu.GetClockDivider();
1280 		}
1281 
GetCpuClock(uint clock) const1282 		Cycle Apu::Channel::GetCpuClock(uint clock) const
1283 		{
1284 			return apu.cpu.GetClock(clock);
1285 		}
1286 
GetSampleRate() const1287 		dword Apu::Channel::GetSampleRate() const
1288 		{
1289 			return apu.settings.rate;
1290 		}
1291 
IsMuted() const1292 		bool Apu::Channel::IsMuted() const
1293 		{
1294 			return apu.settings.muted;
1295 		}
1296 
Update() const1297 		void Apu::Channel::Update() const
1298 		{
1299 			apu.Update();
1300 		}
1301 
Clock(Cycle,Cycle,Cycle)1302 		Cycle Apu::Channel::Clock(Cycle,Cycle,Cycle)
1303 		{
1304 			return Cpu::CYCLE_MAX;
1305 		}
1306 
1307 		#ifdef NST_MSVC_OPTIMIZE
1308 		#pragma optimize("s", on)
1309 		#endif
1310 
Oscillator()1311 		Apu::Oscillator::Oscillator()
1312 		: rate(1), fixed(1) {}
1313 
Reset()1314 		void Apu::Oscillator::Reset()
1315 		{
1316 			active = false;
1317 			timer = RESET_CYCLES * fixed;
1318 			frequency = fixed;
1319 			amp = 0;
1320 		}
1321 
ClearAmp()1322 		inline void Apu::Oscillator::ClearAmp()
1323 		{
1324 			amp = 0;
1325 		}
1326 
UpdateSettings(dword r,uint f)1327 		void Apu::Oscillator::UpdateSettings(dword r,uint f)
1328 		{
1329 			NST_ASSERT( r && f );
1330 
1331 			frequency = frequency / fixed * f;
1332 			timer = timer / fixed * f;
1333 			fixed = f;
1334 			rate = r;
1335 		}
1336 
Reset()1337 		void Apu::Square::Reset()
1338 		{
1339 			Oscillator::Reset();
1340 
1341 			frequency = fixed * 2;
1342 			step = 0;
1343 			duty = 0;
1344 
1345 			envelope.Reset();
1346 			lengthCounter.Reset();
1347 
1348 			validFrequency = false;
1349 
1350 			sweepRate = 0;
1351 			sweepCount = 1;
1352 			sweepReload = false;
1353 			sweepIncrease = ~0U;
1354 			sweepShift = 0;
1355 
1356 			waveLength = 0;
1357 		}
1358 
1359 		#ifdef NST_MSVC_OPTIMIZE
1360 		#pragma optimize("", on)
1361 		#endif
1362 
CanOutput() const1363 		inline bool Apu::Square::CanOutput() const
1364 		{
1365 			return lengthCounter.GetCount() && envelope.Volume() && validFrequency;
1366 		}
1367 
1368 		#ifdef NST_MSVC_OPTIMIZE
1369 		#pragma optimize("s", on)
1370 		#endif
1371 
UpdateSettings(uint v,dword r,uint f)1372 		void Apu::Square::UpdateSettings(uint v,dword r,uint f)
1373 		{
1374 			Oscillator::UpdateSettings( r, f );
1375 			envelope.SetOutputVolume( (v * Channel::OUTPUT_MUL + Channel::DEFAULT_VOLUME/2) / Channel::DEFAULT_VOLUME );
1376 			active = CanOutput();
1377 		}
1378 
SaveState(State::Saver & state,const dword chunk) const1379 		void Apu::Square::SaveState(State::Saver& state,const dword chunk) const
1380 		{
1381 			state.Begin( chunk );
1382 
1383 			{
1384 				byte data[4];
1385 
1386 				data[0] = waveLength & 0xFFU;
1387 				data[1] = (waveLength >> 8) | (duty ? duty << (2+3) : 2U << 3); // for version compatibility
1388 				data[2] = (sweepCount - 1U) << 4;
1389 
1390 				if (sweepRate)
1391 					data[2] |= 0x08U | (sweepRate - 1);
1392 
1393 				if (sweepReload)
1394 					data[2] |= 0x80U;
1395 
1396 				data[3] = sweepShift;
1397 
1398 				if (!sweepIncrease)
1399 					data[3] |= 0x08U;
1400 
1401 				state.Begin( AsciiId<'R','E','G'>::V ).Write( data ).End();
1402 			}
1403 
1404 			lengthCounter.SaveState( state, AsciiId<'L','E','N'>::V );
1405 			envelope.SaveState( state, AsciiId<'E','N','V'>::V );
1406 
1407 			{
1408 				const byte data[13] =
1409 				{
1410 					step,
1411 					timer & 0xFFU,
1412 					timer >> 8,
1413 					timer >> 16,
1414 					timer >> 24,
1415 					frequency & 0xFFU,
1416 					frequency >> 8,
1417 					frequency >> 16,
1418 					frequency >> 24,
1419 					amp & 0xFFU,
1420 					amp >> 8,
1421 					amp >> 16,
1422 					amp >> 24,
1423 				};
1424 
1425 				state.Begin( AsciiId<'S','0','0'>::V ).Write( data ).End();
1426 			}
1427 
1428 			state.End();
1429 		}
1430 
LoadState(State::Loader & state)1431 		void Apu::Square::LoadState(State::Loader& state)
1432 		{
1433 			while (const dword chunk = state.Begin())
1434 			{
1435 				switch (chunk)
1436 				{
1437 					case AsciiId<'R','E','G'>::V:
1438 					{
1439 						State::Loader::Data<4> data( state );
1440 
1441 						waveLength = data[0] | (data[1] << 8 & 0x0700);
1442 
1443 						// for version compatibility
1444 						switch (data[1] >> 3 & 0xF)
1445 						{
1446 							case 4:  duty = 1; break;
1447 							case 8:  duty = 2; break;
1448 							case 12: duty = 3; break;
1449 							default: duty = 0; break;
1450 						}
1451 
1452 						if (data[2] & 0x08)
1453 							sweepRate = (data[2] & 0x07) + 1;
1454 						else
1455 							sweepRate = 0;
1456 
1457 						sweepCount = (data[2] >> 4 & 0x07) + 1;
1458 						sweepReload = data[2] >> 7;
1459 						sweepShift = data[3] & 0x07;
1460 						sweepIncrease = (data[3] & 0x08) ? 0U : ~0U;
1461 
1462 						step = 0;
1463 						timer = 0;
1464 						break;
1465 					}
1466 
1467 					case AsciiId<'L','E','N'>::V:
1468 
1469 						lengthCounter.LoadState( state );
1470 						break;
1471 
1472 					case AsciiId<'E','N','V'>::V:
1473 
1474 						envelope.LoadState( state );
1475 
1476 						UpdateFrequency();
1477 						break;
1478 
1479 					case AsciiId<'S','0','0'>::V:
1480 					{
1481 						State::Loader::Data<13> data( state );
1482 
1483 						step = data[0];
1484 						timer = data[1] | (data[2] << 8) | (data[3] << 16) | (data[4] << 24);
1485 						frequency = data[5] | (data[6] << 8) | (data[7] << 16) | (data[8] << 24);
1486 						amp = data[9] | (data[10] << 8) | (data[11] << 16) | (data[12] << 24);
1487 						break;
1488 					}
1489 				}
1490 
1491 				state.End();
1492 			}
1493 		}
1494 
1495 		#ifdef NST_MSVC_OPTIMIZE
1496 		#pragma optimize("", on)
1497 		#endif
1498 
Disable(const bool disable)1499 		NST_SINGLE_CALL void Apu::Square::Disable(const bool disable)
1500 		{
1501 			active &= lengthCounter.Disable( disable );
1502 		}
1503 
UpdateFrequency()1504 		void Apu::Square::UpdateFrequency()
1505 		{
1506 			if (waveLength >= MIN_FRQ && waveLength + (sweepIncrease & waveLength >> sweepShift) <= MAX_FRQ)
1507 			{
1508 				frequency = (waveLength + 1UL) * 2 * fixed;
1509 				validFrequency = true;
1510 				active = lengthCounter.GetCount() && envelope.Volume();
1511 			}
1512 			else
1513 			{
1514 				validFrequency = false;
1515 				active = false;
1516 			}
1517 		}
1518 
WriteReg0(const uint data)1519 		NST_SINGLE_CALL void Apu::Square::WriteReg0(const uint data)
1520 		{
1521 			envelope.Write( data );
1522 			duty = data >> REG0_DUTY_SHIFT;
1523 			active = CanOutput();
1524 		}
1525 
WriteReg1(const uint data)1526 		NST_SINGLE_CALL void Apu::Square::WriteReg1(const uint data)
1527 		{
1528 			sweepIncrease = (data & REG1_SWEEP_DECREASE) ? 0U : ~0U;
1529 			sweepShift = data & REG1_SWEEP_SHIFT;
1530 			sweepRate = 0;
1531 
1532 			if ((data & (REG1_SWEEP_ENABLED|REG1_SWEEP_SHIFT)) > REG1_SWEEP_ENABLED)
1533 			{
1534 				sweepRate = ((data & REG1_SWEEP_RATE) >> REG1_SWEEP_RATE_SHIFT) + 1;
1535 				sweepReload = true;
1536 			}
1537 
1538 			UpdateFrequency();
1539 		}
1540 
WriteReg2(const uint data)1541 		NST_SINGLE_CALL void Apu::Square::WriteReg2(const uint data)
1542 		{
1543 			waveLength = (waveLength & uint(REG3_WAVELENGTH_HIGH)) | (data & REG3_WAVELENGTH_LOW);
1544 
1545 			UpdateFrequency();
1546 		}
1547 
WriteReg3(const uint data,const Cycle frameCounterDelta)1548 		NST_SINGLE_CALL void Apu::Square::WriteReg3(const uint data,const Cycle frameCounterDelta)
1549 		{
1550 			step = 0;
1551 
1552 			envelope.ResetClock();
1553 			lengthCounter.Write( data, frameCounterDelta );
1554 
1555 			waveLength = (data << 8 & REG3_WAVELENGTH_HIGH) | (waveLength & uint(REG3_WAVELENGTH_LOW));
1556 
1557 			UpdateFrequency();
1558 		}
1559 
ClockEnvelope()1560 		NST_SINGLE_CALL void Apu::Square::ClockEnvelope()
1561 		{
1562 			envelope.Clock();
1563 			active = CanOutput();
1564 		}
1565 
ClockSweep(const uint complement)1566 		NST_SINGLE_CALL void Apu::Square::ClockSweep(const uint complement)
1567 		{
1568 			if (!envelope.Looping() && lengthCounter.Clock())
1569 				active = false;
1570 
1571 			if (sweepRate && !--sweepCount)
1572 			{
1573 				sweepCount = sweepRate;
1574 
1575 				if (waveLength >= MIN_FRQ)
1576 				{
1577 					const uint shifted = waveLength >> sweepShift;
1578 
1579 					if (!sweepIncrease)
1580 					{
1581 						waveLength += complement - shifted;
1582 						UpdateFrequency();
1583 					}
1584 					else if (waveLength + shifted <= MAX_FRQ)
1585 					{
1586 						waveLength += shifted;
1587 						UpdateFrequency();
1588 					}
1589 				}
1590 			}
1591 
1592 			if (sweepReload)
1593 			{
1594 				sweepReload = false;
1595 				sweepCount = sweepRate;
1596 			}
1597 		}
1598 
GetLengthCounter() const1599 		inline uint Apu::Square::GetLengthCounter() const
1600 		{
1601 			return lengthCounter.GetCount();
1602 		}
1603 
GetSample()1604 		dword Apu::Square::GetSample()
1605 		{
1606 			NST_VERIFY( bool(active) == CanOutput() && timer >= 0 );
1607 
1608 			dword sum = timer;
1609 			timer -= idword(rate);
1610 
1611 			if (active)
1612 			{
1613 				static const byte forms[4][8] =
1614 				{
1615 					{0x1F,0x00,0x1F,0x1F,0x1F,0x1F,0x1F,0x1F},
1616 					{0x1F,0x00,0x00,0x1F,0x1F,0x1F,0x1F,0x1F},
1617 					{0x1F,0x00,0x00,0x00,0x00,0x1F,0x1F,0x1F},
1618 					{0x00,0x1F,0x1F,0x00,0x00,0x00,0x00,0x00}
1619 				};
1620 
1621 				const byte* const NST_RESTRICT form = forms[duty];
1622 
1623 				if (timer >= 0)
1624 				{
1625 					amp = envelope.Volume() >> form[step];
1626 				}
1627 				else
1628 				{
1629 					sum >>= form[step];
1630 
1631 					do
1632 					{
1633 						sum += NST_MIN(-timer,frequency) >> form[step = (step + 1) & 0x7];
1634 						timer += idword(frequency);
1635 					}
1636 					while (timer < 0);
1637 
1638 					NST_VERIFY( !envelope.Volume() || sum <= 0xFFFFFFFF / envelope.Volume() + rate/2 );
1639 					amp = (sum * envelope.Volume() + rate/2) / rate;
1640 				}
1641 			}
1642 			else
1643 			{
1644 				if (timer < 0)
1645 				{
1646 					const uint count = (-timer + frequency - 1) / frequency;
1647 					step = (step + count) & 0x7;
1648 					timer += idword(count * frequency);
1649 				}
1650 
1651 				if (amp < Channel::OUTPUT_DECAY)
1652 				{
1653 					return 0;
1654 				}
1655 				else
1656 				{
1657 					amp -= Channel::OUTPUT_DECAY;
1658 				}
1659 			}
1660 
1661 			return amp;
1662 		}
1663 
1664 		#ifdef NST_MSVC_OPTIMIZE
1665 		#pragma optimize("s", on)
1666 		#endif
1667 
Triangle()1668 		Apu::Triangle::Triangle()
1669 		: outputVolume(0) {}
1670 
Reset()1671 		void Apu::Triangle::Reset()
1672 		{
1673 			Oscillator::Reset();
1674 
1675 			step = 0x7;
1676 			status = STATUS_COUNTING;
1677 			waveLength = 0;
1678 			//linearCtrl = 0;
1679 			linearCounter = 0;
1680 
1681 			lengthCounter.Reset();
1682 		}
1683 
1684 		#ifdef NST_MSVC_OPTIMIZE
1685 		#pragma optimize("", on)
1686 		#endif
1687 
CanOutput() const1688 		inline bool Apu::Triangle::CanOutput() const
1689 		{
1690 			return lengthCounter.GetCount() && linearCounter && waveLength >= MIN_FRQ && outputVolume;
1691 		}
1692 
1693 		#ifdef NST_MSVC_OPTIMIZE
1694 		#pragma optimize("s", on)
1695 		#endif
1696 
UpdateSettings(uint v,dword r,uint f)1697 		void Apu::Triangle::UpdateSettings(uint v,dword r,uint f)
1698 		{
1699 			Oscillator::UpdateSettings( r, f );
1700 
1701 			outputVolume = (v * Channel::OUTPUT_MUL + Channel::DEFAULT_VOLUME/2) / Channel::DEFAULT_VOLUME;
1702 			active = CanOutput();
1703 		}
1704 
SaveState(State::Saver & state,const dword chunk) const1705 		void Apu::Triangle::SaveState(State::Saver& state,const dword chunk) const
1706 		{
1707 			state.Begin( chunk );
1708 
1709 			{
1710 				const byte data[4] =
1711 				{
1712 					waveLength & 0xFFU,
1713 					waveLength >> 8,
1714 					linearCounter | (uint(status) << 7),
1715 					linearCtrl
1716 				};
1717 
1718 				state.Begin( AsciiId<'R','E','G'>::V ).Write( data ).End();
1719 			}
1720 
1721 			lengthCounter.SaveState( state, AsciiId<'L','E','N'>::V );
1722 
1723 			{
1724 				const byte data[9] =
1725 				{
1726 					step,
1727 					timer & 0xFFU,
1728 					timer >> 8,
1729 					timer >> 16,
1730 					timer >> 24,
1731 					amp & 0xFFU,
1732 					amp >> 8,
1733 					amp >> 16,
1734 					amp >> 24,
1735 				};
1736 
1737 				state.Begin( AsciiId<'S','0','0'>::V ).Write( data ).End();
1738 			}
1739 
1740 			state.End();
1741 		}
1742 
LoadState(State::Loader & state)1743 		void Apu::Triangle::LoadState(State::Loader& state)
1744 		{
1745 			while (const dword chunk = state.Begin())
1746 			{
1747 				switch (chunk)
1748 				{
1749 					case AsciiId<'R','E','G'>::V:
1750 					{
1751 						State::Loader::Data<4> data( state );
1752 
1753 						waveLength = data[0] | (data[1] << 8 & 0x0700);
1754 						linearCounter = data[2] & 0x7F;
1755 						status = static_cast<Status>(data[2] >> 7);
1756 						linearCtrl = data[3];
1757 
1758 						frequency = (waveLength + 1UL) * fixed;
1759 						timer = 0;
1760 						step = 0;
1761 						break;
1762 					}
1763 
1764 					case AsciiId<'L','E','N'>::V:
1765 
1766 						lengthCounter.LoadState( state );
1767 						break;
1768 
1769 					case AsciiId<'S','0','0'>::V:
1770 					{
1771 						State::Loader::Data<9> data( state );
1772 
1773 						step = data[0];
1774 						timer = data[1] | (data[2] << 8) | (data[3] << 16) | (data[4] << 24);
1775 						amp = data[5] | (data[6] << 8) | (data[7] << 16) | (data[8] << 24);
1776 						break;
1777 					}
1778 				}
1779 
1780 				state.End();
1781 			}
1782 
1783 			active = CanOutput();
1784 		}
1785 
1786 		#ifdef NST_MSVC_OPTIMIZE
1787 		#pragma optimize("", on)
1788 		#endif
1789 
Disable(const bool disable)1790 		NST_SINGLE_CALL void Apu::Triangle::Disable(const bool disable)
1791 		{
1792 			active &= lengthCounter.Disable( disable );
1793 		}
1794 
WriteReg0(const uint data)1795 		NST_SINGLE_CALL void Apu::Triangle::WriteReg0(const uint data)
1796 		{
1797 			linearCtrl = data;
1798 		}
1799 
WriteReg2(const uint data)1800 		NST_SINGLE_CALL void Apu::Triangle::WriteReg2(const uint data)
1801 		{
1802 			waveLength = (waveLength & uint(REG3_WAVE_LENGTH_HIGH)) | (data & REG2_WAVE_LENGTH_LOW);
1803 			frequency = (waveLength + 1UL) * fixed;
1804 
1805 			active = CanOutput();
1806 		}
1807 
WriteReg3(const uint data,const Cycle frameCounterDelta)1808 		NST_SINGLE_CALL void Apu::Triangle::WriteReg3(const uint data,const Cycle frameCounterDelta)
1809 		{
1810 			waveLength = (data << 8 & REG3_WAVE_LENGTH_HIGH) | (waveLength & uint(REG2_WAVE_LENGTH_LOW));
1811 			frequency = (waveLength + 1UL) * fixed;
1812 
1813 			status = STATUS_RELOAD;
1814 			lengthCounter.Write( data, frameCounterDelta );
1815 
1816 			active = CanOutput();
1817 		}
1818 
ClockLinearCounter()1819 		NST_SINGLE_CALL void Apu::Triangle::ClockLinearCounter()
1820 		{
1821 			if (status == STATUS_COUNTING)
1822 			{
1823 				if (linearCounter && !--linearCounter)
1824 					active = false;
1825 			}
1826 			else
1827 			{
1828 				if (!(linearCtrl & uint(REG0_LINEAR_COUNTER_START)))
1829 					status = STATUS_COUNTING;
1830 
1831 				linearCounter = linearCtrl & uint(REG0_LINEAR_COUNTER_LOAD);
1832 				active = CanOutput();
1833 			}
1834 		}
1835 
ClockLengthCounter()1836 		NST_SINGLE_CALL void Apu::Triangle::ClockLengthCounter()
1837 		{
1838 			if (!(linearCtrl & uint(REG0_LINEAR_COUNTER_START)) && lengthCounter.Clock())
1839 				active = false;
1840 		}
1841 
GetSample()1842 		NST_SINGLE_CALL dword Apu::Triangle::GetSample()
1843 		{
1844 			NST_VERIFY( bool(active) == CanOutput() && timer >= 0 );
1845 
1846 			if (active)
1847 			{
1848 				static const byte pyramid[32] =
1849 				{
1850 					0x0,0x1,0x2,0x3,0x4,0x5,0x6,0x7,
1851 					0x8,0x9,0xA,0xB,0xC,0xD,0xE,0xF,
1852 					0xF,0xE,0xD,0xC,0xB,0xA,0x9,0x8,
1853 					0x7,0x6,0x5,0x4,0x3,0x2,0x1,0x0
1854 				};
1855 
1856 				dword sum = timer;
1857 				timer -= idword(rate);
1858 
1859 				if (timer >= 0)
1860 				{
1861 					amp = pyramid[step] * outputVolume * 3;
1862 				}
1863 				else
1864 				{
1865 					sum *= pyramid[step];
1866 
1867 					do
1868 					{
1869 						sum += NST_MIN(-timer,frequency) * pyramid[step = (step + 1) & 0x1F];
1870 						timer += idword(frequency);
1871 					}
1872 					while (timer < 0);
1873 
1874 					NST_VERIFY( !outputVolume || sum <= 0xFFFFFFFF / outputVolume + rate/2 );
1875 					amp = (sum * outputVolume + rate/2) / rate * 3;
1876 				}
1877 			}
1878 			/*else if (amp < Channel::OUTPUT_DECAY)
1879 			{
1880 				return 0;
1881 			}
1882 			else
1883 			{
1884 				amp -= Channel::OUTPUT_DECAY;
1885 				step &= STEP_CHECK;
1886 			}*/
1887 
1888 			return amp;
1889 		}
1890 
GetLengthCounter() const1891 		inline uint Apu::Triangle::GetLengthCounter() const
1892 		{
1893 			return lengthCounter.GetCount();
1894 		}
1895 
1896 		#ifdef NST_MSVC_OPTIMIZE
1897 		#pragma optimize("s", on)
1898 		#endif
1899 
Reset(const CpuModel model)1900 		void Apu::Noise::Reset(const CpuModel model)
1901 		{
1902 			Oscillator::Reset();
1903 
1904 			frequency = lut[model][0] * dword(fixed);
1905 
1906 			bits = 1;
1907 			shifter = 13;
1908 
1909 			envelope.Reset();
1910 			lengthCounter.Reset();
1911 		}
1912 
GetFrequencyIndex() const1913 		uint Apu::Noise::GetFrequencyIndex() const
1914 		{
1915 			for (uint v=frequency/fixed, i=0; i < 16; ++i)
1916 			{
1917 				if (v == lut[0][i] || v == lut[1][i])
1918 					return i;
1919 			}
1920 
1921 			return 0;
1922 		}
1923 
1924 		#ifdef NST_MSVC_OPTIMIZE
1925 		#pragma optimize("", on)
1926 		#endif
1927 
CanOutput() const1928 		inline bool Apu::Noise::CanOutput() const
1929 		{
1930 			return lengthCounter.GetCount() && envelope.Volume();
1931 		}
1932 
1933 		#ifdef NST_MSVC_OPTIMIZE
1934 		#pragma optimize("s", on)
1935 		#endif
1936 
UpdateSettings(uint v,dword r,uint f)1937 		void Apu::Noise::UpdateSettings(uint v,dword r,uint f)
1938 		{
1939 			Oscillator::UpdateSettings( r, f );
1940 			envelope.SetOutputVolume( (v * Channel::OUTPUT_MUL + Channel::DEFAULT_VOLUME/2) / Channel::DEFAULT_VOLUME );
1941 			active = CanOutput();
1942 		}
1943 
SaveState(State::Saver & state,const dword chunk) const1944 		void Apu::Noise::SaveState(State::Saver& state,const dword chunk) const
1945 		{
1946 			state.Begin( chunk );
1947 
1948 			state.Begin( AsciiId<'R','E','G'>::V ).Write8( (shifter == 8 ? 0x10 : 0x00) | GetFrequencyIndex() ).End();
1949 			lengthCounter.SaveState( state, AsciiId<'L','E','N'>::V );
1950 			envelope.SaveState( state, AsciiId<'E','N','V'>::V );
1951 
1952 			{
1953 				const byte data[6] =
1954 				{
1955 					bits & 0xFFU,
1956 					bits >> 8,
1957 					timer & 0xFFU,
1958 					timer >> 8,
1959 					timer >> 16,
1960 					timer >> 24
1961 				};
1962 
1963 				state.Begin( AsciiId<'S','0','0'>::V ).Write( data ).End();
1964 			}
1965 
1966 			state.End();
1967 		}
1968 
LoadState(State::Loader & state,const CpuModel model)1969 		void Apu::Noise::LoadState(State::Loader& state,const CpuModel model)
1970 		{
1971 			while (const dword chunk = state.Begin())
1972 			{
1973 				switch (chunk)
1974 				{
1975 					case AsciiId<'R','E','G'>::V:
1976 					{
1977 						const uint data = state.Read8();
1978 
1979 						frequency = lut[model][data & 0x0F] * dword(fixed);
1980 						shifter = (data & 0x10) ? 8 : 13;
1981 
1982 						timer = 0;
1983 						bits = 1;
1984 						break;
1985 					}
1986 
1987 					case AsciiId<'L','E','N'>::V:
1988 
1989 						lengthCounter.LoadState( state );
1990 						break;
1991 
1992 					case AsciiId<'E','N','V'>::V:
1993 
1994 						envelope.LoadState( state );
1995 						break;
1996 
1997 					case AsciiId<'S','0','0'>::V:
1998 					{
1999 						State::Loader::Data<6> data( state );
2000 
2001 						bits = data[0] | (data[1] << 8);
2002 						timer = data[2] | (data[3] << 8) | (data[4] << 16) | (data[5] << 24);
2003 						break;
2004 					}
2005 				}
2006 
2007 				state.End();
2008 			}
2009 
2010 			active = CanOutput();
2011 		}
2012 
2013 		#ifdef NST_MSVC_OPTIMIZE
2014 		#pragma optimize("", on)
2015 		#endif
2016 
Disable(const bool disable)2017 		NST_SINGLE_CALL void Apu::Noise::Disable(const bool disable)
2018 		{
2019 			active &= lengthCounter.Disable( disable );
2020 		}
2021 
WriteReg0(const uint data)2022 		NST_SINGLE_CALL void Apu::Noise::WriteReg0(const uint data)
2023 		{
2024 			envelope.Write( data );
2025 			active = CanOutput();
2026 		}
2027 
WriteReg2(const uint data,const CpuModel model)2028 		NST_SINGLE_CALL void Apu::Noise::WriteReg2(const uint data,const CpuModel model)
2029 		{
2030 			frequency = lut[model][data & REG2_FREQUENCY] * dword(fixed);
2031 			shifter = (data & REG2_93BIT_MODE) ? 8 : 13;
2032 		}
2033 
WriteReg3(const uint data,const Cycle frameCounterDelta)2034 		NST_SINGLE_CALL void Apu::Noise::WriteReg3(const uint data,const Cycle frameCounterDelta)
2035 		{
2036 			envelope.ResetClock();
2037 			lengthCounter.Write( data, frameCounterDelta );
2038 
2039 			active = CanOutput();
2040 		}
2041 
ClockEnvelope()2042 		NST_SINGLE_CALL void Apu::Noise::ClockEnvelope()
2043 		{
2044 			envelope.Clock();
2045 			active = CanOutput();
2046 		}
2047 
ClockLengthCounter()2048 		NST_SINGLE_CALL void Apu::Noise::ClockLengthCounter()
2049 		{
2050 			if (!envelope.Looping() && lengthCounter.Clock())
2051 				active = false;
2052 		}
2053 
GetSample()2054 		NST_SINGLE_CALL dword Apu::Noise::GetSample()
2055 		{
2056 			NST_VERIFY( bool(active) == CanOutput() && timer >= 0 );
2057 
2058 			dword sum = timer;
2059 			timer -= idword(rate);
2060 
2061 			if (active)
2062 			{
2063 				if (timer >= 0)
2064 				{
2065 					if (!(bits & 0x4000))
2066 						return envelope.Volume() * 2;
2067 				}
2068 				else
2069 				{
2070 					if (bits & 0x4000)
2071 						sum = 0;
2072 
2073 					do
2074 					{
2075 						bits = (bits << 1) | ((bits >> 14 ^ bits >> shifter) & 0x1);
2076 
2077 						if (!(bits & 0x4000))
2078 							sum += NST_MIN(-timer,frequency);
2079 
2080 						timer += idword(frequency);
2081 					}
2082 					while (timer < 0);
2083 
2084 					NST_VERIFY( !envelope.Volume() || sum <= 0xFFFFFFFF / envelope.Volume() + rate/2 );
2085 					return (sum * envelope.Volume() + rate/2) / rate * 2;
2086 				}
2087 			}
2088 			else while (timer < 0)
2089 			{
2090 				bits = (bits << 1) | ((bits >> 14 ^ bits >> shifter) & 0x1);
2091 				timer += idword(frequency);
2092 			}
2093 
2094 			return 0;
2095 		}
2096 
GetLengthCounter() const2097 		inline uint Apu::Noise::GetLengthCounter() const
2098 		{
2099 			return lengthCounter.GetCount();
2100 		}
2101 
2102 		#ifdef NST_MSVC_OPTIMIZE
2103 		#pragma optimize("s", on)
2104 		#endif
2105 
Dmc()2106 		Apu::Dmc::Dmc()
2107 		: outputVolume(0)
2108 		{
2109 			frequency = GetResetFrequency( CPU_RP2A03 );
2110 		}
2111 
Reset(const CpuModel model)2112 		void Apu::Dmc::Reset(const CpuModel model)
2113 		{
2114 			curSample          = 0;
2115 			linSample          = 0;
2116 			frequency          = GetResetFrequency( model );
2117 			regs.ctrl          = 0;
2118 			regs.lengthCounter = 1;
2119 			regs.address       = 0xC000;
2120 			out.active         = false;
2121 			out.shifter        = 0;
2122 			out.dac            = 0;
2123 			out.buffer         = 0x00;
2124 			dma.lengthCounter  = 0;
2125 			dma.buffered       = false;
2126 			dma.address        = 0xC000;
2127 			dma.buffer         = 0x00;
2128 		}
2129 
GetResetFrequency(CpuModel model)2130 		Cycle Apu::Dmc::GetResetFrequency(CpuModel model)
2131 		{
2132 			return lut[model][0];
2133 		}
2134 
UpdateSettings(uint v)2135 		void Apu::Dmc::UpdateSettings(uint v)
2136 		{
2137 			v = (v * Channel::OUTPUT_MUL + Channel::DEFAULT_VOLUME/2) / Channel::DEFAULT_VOLUME;
2138 
2139 			if (outputVolume)
2140 				linSample /= outputVolume;
2141 
2142 			if (outputVolume)
2143 				curSample /= outputVolume;
2144 
2145 			linSample *= v;
2146 			curSample *= v;
2147 			outputVolume = v;
2148 
2149 			if (!v)
2150 				out.active = false;
2151 		}
2152 
ClearAmp()2153 		inline void Apu::Dmc::ClearAmp()
2154 		{
2155 			curSample = 0;
2156 			linSample = 0;
2157 		}
2158 
SaveState(State::Saver & state,const dword chunk,const Cpu & cpu,const Cycle dmcMcClock) const2159 		void Apu::Dmc::SaveState(State::Saver& state,const dword chunk,const Cpu& cpu,const Cycle dmcMcClock) const
2160 		{
2161 			NST_VERIFY( dmcMcClock >= cpu.GetCycles() );
2162 
2163 			dword dmcClock = dmcMcClock;
2164 
2165 			if (dmcClock > cpu.GetCycles())
2166 				dmcClock = (dmcClock - cpu.GetCycles()) / cpu.GetClock();
2167 			else
2168 				dmcClock = 0;
2169 
2170 			NST_VERIFY( dmcClock <= 0x1FFF && dmcMcClock == cpu.GetCycles() + dmcClock * cpu.GetClock() );
2171 
2172 			state.Begin( chunk );
2173 
2174 			{
2175 				const byte data[12] =
2176 				{
2177 					dmcClock & 0xFF,
2178 					dmcClock >> 8,
2179 					(
2180 						( ( regs.ctrl & REG0_FREQUENCY  )              ) |
2181 						( ( regs.ctrl & REG0_LOOP       ) ? 0x10U : 0U ) |
2182 						( ( regs.ctrl & REG0_IRQ_ENABLE ) ? 0x20U : 0U ) |
2183 						( ( dma.lengthCounter           ) ? 0x40U : 0U )
2184 					),
2185 					(regs.address - 0xC000U) >> 6,
2186 					(regs.lengthCounter - 1U) >> 4,
2187 					(dma.address >> 0 & 0xFFU),
2188 					(dma.address >> 8 & 0x7FU) | (dma.buffered ? 0x80 : 0x00),
2189 					dma.lengthCounter ? (dma.lengthCounter - 1U) >> 4 : 0,
2190 					dma.buffer,
2191 					7 - out.shifter,
2192 					out.buffer,
2193 					out.dac,
2194 				};
2195 
2196 				state.Begin( AsciiId<'R','E','G'>::V ).Write( data ).End();
2197 			}
2198 
2199 			{
2200 				const byte data[4] =
2201 				{
2202 					linSample & 0xFFU,
2203 					linSample >> 8,
2204 					dma.lengthCounter & 0xFFU,
2205 					dma.lengthCounter >> 8,
2206 				};
2207 
2208 				state.Begin( AsciiId<'S','0','0'>::V ).Write( data ).End();
2209 			}
2210 
2211 			state.End();
2212 		}
2213 
LoadState(State::Loader & state,const Cpu & cpu,const CpuModel model,Cycle & dmcClock)2214 		void Apu::Dmc::LoadState(State::Loader& state,const Cpu& cpu,const CpuModel model,Cycle& dmcClock)
2215 		{
2216 			while (const dword chunk = state.Begin())
2217 			{
2218 				switch (chunk)
2219 				{
2220 					case AsciiId<'R','E','G'>::V:
2221 					{
2222 						State::Loader::Data<12> data( state );
2223 
2224 						dmcClock = cpu.GetCycles() + ((data[0] | data[1] << 8) * cpu.GetClock());
2225 
2226 						regs.ctrl =
2227 						(
2228 							( ( data[2] & 0x10 ) ? REG0_LOOP       : 0U ) |
2229 							( ( data[2] & 0x20 ) ? REG0_IRQ_ENABLE : 0U ) |
2230 							( ( data[2] & REG0_FREQUENCY )              )
2231 						);
2232 
2233 						frequency          = lut[model][regs.ctrl & REG0_FREQUENCY];
2234 						regs.address       = 0xC000 | (data[3] << 6);
2235 						regs.lengthCounter = (data[4] << 4) + 1;
2236 						dma.address        = 0x8000 | data[5] | (data[6] << 8 & 0x7F00);
2237 						dma.buffered       = data[6] >> 7;
2238 						dma.lengthCounter  = (data[2] & 0x40) ? (data[7] << 4) + 1 : 0;
2239 						dma.buffer         = data[8];
2240 						out.shifter        = 7 - (data[9] & 0x7);
2241 						out.buffer         = data[10];
2242 						out.dac            = data[11] & 0x7F;
2243 
2244 						curSample = out.dac * outputVolume;
2245 						linSample = curSample;
2246 						out.active = dma.buffered && outputVolume;
2247 						break;
2248 					}
2249 
2250 					case AsciiId<'S','0','0'>::V:
2251 					{
2252 						State::Loader::Data<4> data( state );
2253 
2254 						linSample = data[0] | (data[1] << 8);
2255 						dma.lengthCounter = data[2] | (data[3] << 8);
2256 						break;
2257 					}
2258 				}
2259 
2260 				state.End();
2261 			}
2262 		}
2263 
2264 		#ifdef NST_MSVC_OPTIMIZE
2265 		#pragma optimize("", on)
2266 		#endif
2267 
Disable(const bool disable,Cpu & cpu)2268 		NST_SINGLE_CALL void Apu::Dmc::Disable(const bool disable,Cpu& cpu)
2269 		{
2270 			cpu.ClearIRQ( Cpu::IRQ_DMC );
2271 
2272 			if (disable)
2273 			{
2274 				dma.lengthCounter = 0;
2275 			}
2276 			else if (!dma.lengthCounter)
2277 			{
2278 				dma.lengthCounter = regs.lengthCounter;
2279 				dma.address = regs.address;
2280 
2281 				if (!dma.buffered)
2282 					DoDMA( cpu, cpu.GetCycles() );
2283 			}
2284 		}
2285 
GetSample()2286 		NST_SINGLE_CALL dword Apu::Dmc::GetSample()
2287 		{
2288 			if (curSample != linSample)
2289 			{
2290 				const uint step = outputVolume * INP_STEP;
2291 
2292 				if (curSample + step - linSample <= step*2)
2293 				{
2294 					linSample = curSample;
2295 				}
2296 				else if (curSample > linSample)
2297 				{
2298 					linSample += step;
2299 				}
2300 				else
2301 				{
2302 					linSample -= step;
2303 				}
2304 			}
2305 
2306 			return linSample;
2307 		}
2308 
DoDMA(Cpu & cpu,const Cycle clock,const uint readAddress)2309 		void Apu::Dmc::DoDMA(Cpu& cpu,const Cycle clock,const uint readAddress)
2310 		{
2311 			NST_VERIFY( !dma.buffered && (!readAddress || !cpu.IsWriteCycle(clock)) );
2312 
2313 			if (!readAddress)
2314 			{
2315 				cpu.StealCycles( cpu.GetClock(cpu.IsWriteCycle(clock) ? 2 : 3) );
2316 			}
2317 			else if (cpu.GetCycles() != clock)
2318 			{
2319 				cpu.StealCycles( cpu.GetClock(3) );
2320 			}
2321 			else
2322 			{
2323 				NST_DEBUG_MSG("DMA/Read conflict!");
2324 
2325 				cpu.StealCycles( cpu.GetClock(1) );
2326 
2327 				/* According to dmc_dma_during_read4/dma_2007_read, DMC DMA during read causes
2328 				 * 2-3 extra $2007 reads before the real read. The nesdev wiki states that this
2329 				 * also happens when polling $2002 for vblank.
2330 				*/
2331 				if ((readAddress & 0xF000) != 0x4000)
2332 				{
2333 					cpu.Peek( readAddress );
2334 					cpu.Peek( readAddress );
2335 				}
2336 
2337 				cpu.StealCycles( cpu.GetClock(1) );
2338 				cpu.Peek( readAddress );
2339 				cpu.StealCycles( cpu.GetClock(1) );
2340 			}
2341 
2342 			dma.buffer = cpu.Peek( dma.address );
2343 			cpu.StealCycles( cpu.GetClock() );
2344 			dma.address = 0x8000 | ((dma.address + 1U) & 0x7FFF);
2345 			dma.buffered = true;
2346 
2347 			NST_VERIFY( dma.lengthCounter );
2348 
2349 			if (!--dma.lengthCounter)
2350 			{
2351 				if (regs.ctrl & REG0_LOOP)
2352 				{
2353 					dma.address = regs.address;
2354 					dma.lengthCounter = regs.lengthCounter;
2355 				}
2356 				else if (regs.ctrl & REG0_IRQ_ENABLE)
2357 				{
2358 					cpu.DoIRQ( Cpu::IRQ_DMC );
2359 				}
2360 			}
2361 		}
2362 
WriteReg0(const uint data,const CpuModel model)2363 		NST_SINGLE_CALL bool Apu::Dmc::WriteReg0(const uint data,const CpuModel model)
2364 		{
2365 			regs.ctrl = data;
2366 			frequency = lut[model][data & REG0_FREQUENCY];
2367 			return data & REG0_IRQ_ENABLE;
2368 		}
2369 
WriteReg1(const uint data)2370 		NST_SINGLE_CALL void Apu::Dmc::WriteReg1(const uint data)
2371 		{
2372 			out.dac = data & 0x7F;
2373 			curSample = out.dac * outputVolume;
2374 		}
2375 
WriteReg2(const uint data)2376 		NST_SINGLE_CALL void Apu::Dmc::WriteReg2(const uint data)
2377 		{
2378 			regs.address = 0xC000 | (data << 6);
2379 		}
2380 
WriteReg3(const uint data)2381 		NST_SINGLE_CALL void Apu::Dmc::WriteReg3(const uint data)
2382 		{
2383 			regs.lengthCounter = (data << 4) + 1;
2384 		}
2385 
ClockDAC()2386 		NST_SINGLE_CALL bool Apu::Dmc::ClockDAC()
2387 		{
2388 			if (out.active)
2389 			{
2390 				const uint next = out.dac + ((out.buffer & 0x1U) << 2) - 2;
2391 				out.buffer >>= 1;
2392 
2393 				if (next <= 0x7F && next != out.dac)
2394 				{
2395 					out.dac = next;
2396 					return true;
2397 				}
2398 			}
2399 
2400 			return false;
2401 		}
2402 
Update()2403 		NST_SINGLE_CALL void Apu::Dmc::Update()
2404 		{
2405 			curSample = out.dac * outputVolume;
2406 		}
2407 
ClockDMA(Cpu & cpu,Cycle & clock,const uint readAddress)2408 		NST_SINGLE_CALL void Apu::Dmc::ClockDMA(Cpu& cpu,Cycle& clock,const uint readAddress)
2409 		{
2410 			const Cycle tmp = clock;
2411 			clock += frequency;
2412 
2413 			if (out.shifter)
2414 			{
2415 				out.shifter--;
2416 			}
2417 			else
2418 			{
2419 				out.shifter = 7;
2420 				out.active = dma.buffered;
2421 
2422 				if (out.active)
2423 				{
2424 					out.active = outputVolume;
2425 					dma.buffered = false;
2426 					out.buffer = dma.buffer;
2427 
2428 					if (dma.lengthCounter)
2429 						DoDMA( cpu, tmp, readAddress );
2430 				}
2431 			}
2432 		}
2433 
GetLengthCounter() const2434 		inline uint Apu::Dmc::GetLengthCounter() const
2435 		{
2436 			return dma.lengthCounter;
2437 		}
2438 
2439 		#ifdef NST_MSVC_OPTIMIZE
2440 		#pragma optimize("s", on)
2441 		#endif
2442 
ClearBuffers()2443 		void Apu::ClearBuffers()
2444 		{
2445 			ClearBuffers( true );
2446 		}
2447 
ClearBuffers(bool resync)2448 		NST_NO_INLINE void Apu::ClearBuffers(bool resync)
2449 		{
2450 			if (resync)
2451 				synchronizer.Resync( settings.speed, cpu );
2452 
2453 			square[0].ClearAmp();
2454 			square[1].ClearAmp();
2455 			triangle.ClearAmp();
2456 			noise.ClearAmp();
2457 			dmc.ClearAmp();
2458 
2459 			dcBlocker.Reset();
2460 
2461 			buffer.Reset( settings.bits, false );
2462 		}
2463 
2464 		#ifdef NST_MSVC_OPTIMIZE
2465 		#pragma optimize("", on)
2466 		#endif
2467 
Clock()2468 		Cycle Apu::Clock()
2469 		{
2470 			if (cycles.dmcClock <= cpu.GetCycles())
2471 				ClockDmc( cpu.GetCycles() );
2472 
2473 			if (cycles.frameIrqClock <= cpu.GetCycles())
2474 				ClockFrameIRQ( cpu.GetCycles() );
2475 
2476 			return NST_MIN(cycles.dmcClock,cycles.frameIrqClock);
2477 		}
2478 
ClockDMA(uint readAddress)2479 		void Apu::ClockDMA(uint readAddress)
2480 		{
2481 			if (cycles.dmcClock <= cpu.GetCycles())
2482 				ClockDmc( cpu.GetCycles(), readAddress );
2483 		}
2484 
ClockOscillators(const bool twoClocks)2485 		NST_NO_INLINE void Apu::ClockOscillators(const bool twoClocks)
2486 		{
2487 			for (uint i=0; i < 2; ++i)
2488 				square[i].ClockEnvelope();
2489 
2490 			triangle.ClockLinearCounter();
2491 			noise.ClockEnvelope();
2492 
2493 			if (twoClocks)
2494 			{
2495 				for (uint i=0; i < 2; ++i)
2496 					square[i].ClockSweep( i-1 );
2497 
2498 				triangle.ClockLengthCounter();
2499 				noise.ClockLengthCounter();
2500 			}
2501 		}
2502 
ClockDmc(const Cycle target,const uint readAddress)2503 		NST_NO_INLINE void Apu::ClockDmc(const Cycle target,const uint readAddress)
2504 		{
2505 			NST_ASSERT( cycles.dmcClock <= target );
2506 
2507 			do
2508 			{
2509 				if (dmc.ClockDAC())
2510 				{
2511 					Update( cycles.dmcClock );
2512 					dmc.Update();
2513 				}
2514 
2515 				dmc.ClockDMA( cpu, cycles.dmcClock, readAddress );
2516 			}
2517 			while (cycles.dmcClock <= target);
2518 		}
2519 
ClockFrameCounter()2520 		NST_NO_INLINE void Apu::ClockFrameCounter()
2521 		{
2522 			NST_COMPILE_ASSERT( STATUS_SEQUENCE_5_STEP == 0x80 );
2523 			NST_VERIFY( cycles.frameCounter <= cpu.GetCycles() * cycles.fixed );
2524 
2525 			ClockOscillators( cycles.frameDivider & 0x1U );
2526 
2527 			cycles.frameDivider = (cycles.frameDivider + 1) & 0x3U;
2528 			cycles.frameCounter += Cycles::oscillatorClocks[cpu.GetModel()][ctrl >> 7][cycles.frameDivider] * cycles.fixed;
2529 		}
2530 
ClockFrameIRQ(const Cycle target)2531 		NST_NO_INLINE void Apu::ClockFrameIRQ(const Cycle target)
2532 		{
2533 			NST_VERIFY( ctrl == STATUS_FRAME_IRQ_ENABLE );
2534 
2535 			cpu.DoIRQ( Cpu::IRQ_FRAME, cycles.frameIrqClock );
2536 
2537 			Cycle clock = cycles.frameIrqClock;
2538 			uint repeat = cycles.frameIrqRepeat;
2539 
2540 			do
2541 			{
2542 				clock += Cycles::frameClocks[cpu.GetModel()][1 + repeat++ % 3];
2543 			}
2544 			while (clock <= target);
2545 
2546 			cycles.frameIrqClock = clock;
2547 			cycles.frameIrqRepeat = repeat;
2548 		}
2549 
GetSample()2550 		NST_NO_INLINE Apu::Channel::Sample Apu::GetSample()
2551 		{
2552 			dword dac[2];
2553 
2554 			return Clamp<Channel::OUTPUT_MIN,Channel::OUTPUT_MAX>
2555 			(
2556 				dcBlocker.Apply
2557 				(
2558 					(0 != (dac[0] = square[0].GetSample() + square[1].GetSample()) ? NLN_SQ_0 / (NLN_SQ_1 / dac[0] + NLN_SQ_2) : 0) +
2559 					(0 != (dac[1] = triangle.GetSample() + noise.GetSample() + dmc.GetSample()) ? NLN_TND_0 / (NLN_TND_1 / dac[1] + NLN_TND_2) : 0)
2560 				) + (extChannel ? extChannel->GetSample() : 0)
2561 			);
2562 		}
2563 
2564 		NES_POKE_AD(Apu,4000)
2565 		{
2566 			UpdateLatency();
2567 			square[address >> 2 & 0x1].WriteReg0( data );
2568 		}
2569 
2570 		NES_POKE_AD(Apu,4001)
2571 		{
2572 			Update();
2573 			square[address >> 2 & 0x1].WriteReg1( data );
2574 		}
2575 
2576 		NES_POKE_AD(Apu,4002)
2577 		{
2578 			Update();
2579 			square[address >> 2 & 0x1].WriteReg2( data );
2580 		}
2581 
2582 		NES_POKE_AD(Apu,4003)
2583 		{
2584 			square[address >> 2 & 0x1].WriteReg3( data, UpdateDelta() );
2585 		}
2586 
2587 		NES_POKE_D(Apu,4008)
2588 		{
2589 			Update();
2590 			triangle.WriteReg0( data );
2591 		}
2592 
2593 		NES_POKE_D(Apu,400A)
2594 		{
2595 			Update();
2596 			triangle.WriteReg2( data );
2597 		}
2598 
2599 		NES_POKE_D(Apu,400B)
2600 		{
2601 			triangle.WriteReg3( data, UpdateDelta() );
2602 		}
2603 
2604 		NES_POKE_D(Apu,400C)
2605 		{
2606 			UpdateLatency();
2607 			noise.WriteReg0( data );
2608 		}
2609 
2610 		NES_POKE_D(Apu,400E)
2611 		{
2612 			Update();
2613 			noise.WriteReg2( data, cpu.GetModel() );
2614 		}
2615 
2616 		NES_POKE_D(Apu,400F)
2617 		{
2618 			noise.WriteReg3( data, UpdateDelta() );
2619 		}
2620 
2621 		NES_POKE_D(Apu,4010)
2622 		{
2623 			if (!dmc.WriteReg0( data, cpu.GetModel() ))
2624 				cpu.ClearIRQ( Cpu::IRQ_DMC );
2625 		}
2626 
2627 		NES_POKE_D(Apu,4011)
2628 		{
2629 			Update();
2630 			dmc.WriteReg1( data );
2631 		}
2632 
2633 		NES_POKE_D(Apu,4012)
2634 		{
2635 			dmc.WriteReg2( data );
2636 		}
2637 
2638 		NES_POKE_D(Apu,4013)
2639 		{
2640 			dmc.WriteReg3( data );
2641 		}
2642 
2643 		NES_POKE_D(Apu,4015)
2644 		{
2645 			Update();
2646 
2647 			data = ~data;
2648 
2649 			square[0].Disable ( data >> 0 & 0x1  );
2650 			square[1].Disable ( data >> 1 & 0x1  );
2651 			triangle.Disable  ( data >> 2 & 0x1  );
2652 			noise.Disable     ( data >> 3 & 0x1  );
2653 			dmc.Disable       ( data & 0x10, cpu );
2654 		}
2655 
2656 		NES_PEEK_A(Apu,4015)
2657 		{
2658 			NST_COMPILE_ASSERT( Cpu::IRQ_FRAME == 0x40 && Cpu::IRQ_DMC == 0x80 );
2659 
2660 			const Cycle elapsed = cpu.Update( address );
2661 
2662 			if (cycles.frameIrqClock <= elapsed)
2663 				ClockFrameIRQ( elapsed );
2664 
2665 			if (cycles.frameCounter < elapsed * cycles.fixed)
2666 				Update( elapsed );
2667 
2668 			const uint data = cpu.GetIRQ();
2669 			cpu.ClearIRQ( Cpu::IRQ_FRAME );
2670 
2671 			return (data & (Cpu::IRQ_FRAME|Cpu::IRQ_DMC)) |
2672 			(
2673 				( square[0].GetLengthCounter() ? 0x01U : 0x00U ) |
2674 				( square[1].GetLengthCounter() ? 0x02U : 0x00U ) |
2675 				( triangle.GetLengthCounter()  ? 0x04U : 0x00U ) |
2676 				( noise.GetLengthCounter()     ? 0x08U : 0x00U ) |
2677 				( dmc.GetLengthCounter()       ? 0x10U : 0x00U )
2678 			);
2679 		}
2680 
WriteFrameCtrl(uint data)2681 		void Apu::WriteFrameCtrl(uint data)
2682 		{
2683 			Cycle next = cpu.Update();
2684 
2685 			if (cpu.IsOddCycle())
2686 				next += cpu.GetClock();
2687 
2688 			Update( next );
2689 
2690 			if (cycles.frameIrqClock <= next)
2691 				ClockFrameIRQ( next );
2692 
2693 			next += cpu.GetClock();
2694 
2695 			data &= STATUS_BITS;
2696 
2697 			cycles.frameCounter = (next + Cycles::oscillatorClocks[cpu.GetModel()][data >> 7][0]) * cycles.fixed;
2698 			cycles.frameDivider = 0;
2699 			cycles.frameIrqRepeat = 0;
2700 
2701 			ctrl = data;
2702 
2703 			if (data)
2704 			{
2705 				cycles.frameIrqClock = Cpu::CYCLE_MAX;
2706 
2707 				if (data & STATUS_NO_FRAME_IRQ)
2708 					cpu.ClearIRQ( Cpu::IRQ_FRAME );
2709 
2710 				if (data & STATUS_SEQUENCE_5_STEP)
2711 					ClockOscillators( true );
2712 			}
2713 			else
2714 			{
2715 				cycles.frameIrqClock = next + Cycles::frameClocks[cpu.GetModel()][0];
2716 			}
2717 		}
2718 
2719 		NES_PEEK(Apu,40xx)
2720 		{
2721 			return 0x40;
2722 		}
2723 	}
2724 }
2725